[clang] Avoid need for SLocEntryLoaded BitVector (PR #67960)

via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 2 01:37:22 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

<details>
<summary>Changes</summary>

The BitVector is currently used to keep track of which entries of LoadedSLocEntryTable have been loaded. Such information can be stored in the SLocEntry itself. Moreover, thanks to the fact that LoadedSLocEntryTable is now a PagedVector, we can simply consider elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.

---
Full diff: https://github.com/llvm/llvm-project/pull/67960.diff


3 Files Affected:

- (modified) clang/include/clang/Basic/SourceManager.h (+7-9) 
- (modified) clang/lib/Basic/SourceManager.cpp (+16-11) 
- (modified) llvm/include/llvm/ADT/PagedVector.h (+6) 


``````````diff
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
     FileInfo File;
     ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo &getFile() const {
     assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-      1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+      1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public RefCountedBase<SourceManager> {
   const SrcMgr::SLocEntry &getLoadedSLocEntry(unsigned Index,
                                               bool *Invalid = nullptr) const {
     assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-    if (SLocEntryLoaded[Index])
+    if (LoadedSLocEntryTable.isMaterialized(Index) &&
+        LoadedSLocEntryTable[Index].isLoaded())
       return LoadedSLocEntryTable[Index];
     return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager &Old) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-    if (!Old.SLocEntryLoaded[I])
+    if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+        !Old.LoadedSLocEntryTable[I].isLoaded())
       Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache &SourceManager::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry &SourceManager::loadSLocEntry(unsigned Index,
                                                       bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+         !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast<int>(Index) + 2))) {
     if (Invalid)
       *Invalid = true;
     // If the file of the SLocEntry changed we could still have loaded it.
-    if (!SLocEntryLoaded[Index]) {
+    if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+        !LoadedSLocEntryTable[Index].isLoaded()) {
       // Try to recover; create a SLocEntry so the rest of clang can handle it.
       if (!FakeSLocEntryForRecovery)
         FakeSLocEntryForRecovery = std::make_unique<SLocEntry>(SLocEntry::get(
@@ -458,7 +460,6 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
     return std::make_pair(0, 0);
   }
   LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
-  SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
   int ID = LoadedSLocEntryTable.size();
   return std::make_pair(-ID - 1, CurrentLoadedOffset);
@@ -604,10 +605,12 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
     assert(LoadedID != -1 && "Loading sentinel FileID");
     unsigned Index = unsigned(-LoadedID) - 2;
     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
-    assert(!SLocEntryLoaded[Index] && "FileID already loaded");
+    assert((!LoadedSLocEntryTable.isMaterialized(Index) ||
+            !LoadedSLocEntryTable[Index].isLoaded()) &&
+           "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(
         LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
-    SLocEntryLoaded[Index] = true;
+    LoadedSLocEntryTable[Index].setLoaded(true);
     return FileID::get(LoadedID);
   }
   unsigned FileSize = File.getSize();
@@ -665,9 +668,11 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
     assert(LoadedID != -1 && "Loading sentinel FileID");
     unsigned Index = unsigned(-LoadedID) - 2;
     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
-    assert(!SLocEntryLoaded[Index] && "FileID already loaded");
+    assert((!LoadedSLocEntryTable.isMaterialized(Index) ||
+            !LoadedSLocEntryTable[Index].isLoaded()) &&
+           "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
-    SLocEntryLoaded[Index] = true;
+    LoadedSLocEntryTable[Index].setLoaded(true);
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
@@ -2223,7 +2228,8 @@ LLVM_DUMP_METHOD void SourceManager::dump() const {
   std::optional<SourceLocation::UIntTy> NextStart;
   for (unsigned Index = 0; Index != LoadedSLocEntryTable.size(); ++Index) {
     int ID = -(int)Index - 2;
-    if (SLocEntryLoaded[Index]) {
+    if (LoadedSLocEntryTable.isMaterialized(Index) &&
+        LoadedSLocEntryTable[Index].isLoaded()) {
       DumpSLocEntry(ID, LoadedSLocEntryTable[Index], NextStart);
       NextStart = LoadedSLocEntryTable[Index].getOffset();
     } else {
@@ -2346,7 +2352,6 @@ size_t SourceManager::getDataStructureSizes() const {
   size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
                 llvm::capacity_in_bytes(LocalSLocEntryTable) +
                 llvm::capacity_in_bytes(LoadedSLocEntryTable) +
-                llvm::capacity_in_bytes(SLocEntryLoaded) +
                 llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 667bece6d718385..dfae3a667360f3a 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -103,6 +103,12 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  [[nodiscard]] bool isMaterialized(size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PageSize < PageToDataPtrs.size());
+    return PageToDataPtrs[Index / PageSize] != nullptr;
+  }
+
   /// Resize the vector. Notice that the constructor of the elements will not
   /// be invoked until an element of a given page is accessed, at which point
   /// all the elements of the page will be constructed.

``````````

</details>


https://github.com/llvm/llvm-project/pull/67960


More information about the cfe-commits mailing list