[llvm] Avoid need for SLocEntryLoaded BitVector (PR #67960)

Giulio Eulisse via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 2 08:00:36 PDT 2023


https://github.com/ktf updated https://github.com/llvm/llvm-project/pull/67960

>From 9fde224de6baa5b1fb3713d810ce835d4456b457 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:37:57 +0200
Subject: [PATCH 1/3] Avoid need for SLocEntryLoaded BitVector

The BitVector is currently used to keep track of which entries of
LoadedSLocEntryTable have been loaded. Such information can be stored in
the SLocEntry itself. Moreover, thanks to the fact that
LoadedSLocEntryTable is now a PagedVector, we can simply consider
elements of unmaterialised pages as not loaded.

This trades reducing the number of OffsetBits by one for reducing memory
usage of the SourceManager by LoadedSLocEntryTable.size()/8 bytes.
---
 clang/include/clang/Basic/SourceManager.h | 16 ++++++--------
 clang/lib/Basic/SourceManager.cpp         | 27 ++++++++++++++---------
 llvm/include/llvm/ADT/PagedVector.h       |  6 +++++
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index c1b24eec2759c71..d450280303d6785 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -474,9 +474,10 @@ static_assert(sizeof(FileInfo) <= sizeof(ExpansionInfo),
 /// SourceManager keeps an array of these objects, and they are uniquely
 /// identified by the FileID datatype.
 class SLocEntry {
-  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 1;
+  static constexpr int OffsetBits = 8 * sizeof(SourceLocation::UIntTy) - 2;
   SourceLocation::UIntTy Offset : OffsetBits;
   SourceLocation::UIntTy IsExpansion : 1;
+  SourceLocation::UIntTy Loaded : 1;
   union {
     FileInfo File;
     ExpansionInfo Expansion;
@@ -489,6 +490,8 @@ class SLocEntry {
 
   bool isExpansion() const { return IsExpansion; }
   bool isFile() const { return !isExpansion(); }
+  [[nodiscard]] bool isLoaded() const { return Loaded; }
+  void setLoaded(bool Value) { Loaded = Value; }
 
   const FileInfo &getFile() const {
     assert(isFile() && "Not a file SLocEntry!");
@@ -716,13 +719,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// The highest possible offset is 2^31-1 (2^63-1 for 64-bit source
   /// locations), so CurrentLoadedOffset starts at 2^31 (2^63 resp.).
   static const SourceLocation::UIntTy MaxLoadedOffset =
-      1ULL << (8 * sizeof(SourceLocation::UIntTy) - 1);
-
-  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
-  /// have already been loaded from the external source.
-  ///
-  /// Same indexing as LoadedSLocEntryTable.
-  llvm::BitVector SLocEntryLoaded;
+      1ULL << (8 * sizeof(SourceLocation::UIntTy) - 2);
 
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
@@ -1710,7 +1707,8 @@ class SourceManager : public RefCountedBase<SourceManager> {
   const SrcMgr::SLocEntry &getLoadedSLocEntry(unsigned Index,
                                               bool *Invalid = nullptr) const {
     assert(Index < LoadedSLocEntryTable.size() && "Invalid index");
-    if (SLocEntryLoaded[Index])
+    if (LoadedSLocEntryTable.isMaterialized(Index) &&
+        LoadedSLocEntryTable[Index].isLoaded())
       return LoadedSLocEntryTable[Index];
     return loadSLocEntry(Index, Invalid);
   }
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 3066cc53dbfd878..20e3f1252ddf077 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -336,7 +336,6 @@ void SourceManager::clearIDTables() {
   MainFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
-  SLocEntryLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -373,7 +372,8 @@ void SourceManager::initializeForReplay(const SourceManager &Old) {
 
   // Ensure all SLocEntries are loaded from the external source.
   for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I)
-    if (!Old.SLocEntryLoaded[I])
+    if (!Old.LoadedSLocEntryTable.isMaterialized(I) ||
+        !Old.LoadedSLocEntryTable[I].isLoaded())
       Old.loadSLocEntry(I, nullptr);
 
   // Inherit any content cache data from the old source manager.
@@ -430,12 +430,14 @@ ContentCache &SourceManager::createMemBufferContentCache(
 
 const SrcMgr::SLocEntry &SourceManager::loadSLocEntry(unsigned Index,
                                                       bool *Invalid) const {
-  assert(!SLocEntryLoaded[Index]);
+  assert(!LoadedSLocEntryTable.isMaterialized(Index) ||
+         !LoadedSLocEntryTable[Index].isLoaded());
   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast<int>(Index) + 2))) {
     if (Invalid)
       *Invalid = true;
     // If the file of the SLocEntry changed we could still have loaded it.
-    if (!SLocEntryLoaded[Index]) {
+    if (!LoadedSLocEntryTable.isMaterialized(Index) ||
+        !LoadedSLocEntryTable[Index].isLoaded()) {
       // Try to recover; create a SLocEntry so the rest of clang can handle it.
       if (!FakeSLocEntryForRecovery)
         FakeSLocEntryForRecovery = std::make_unique<SLocEntry>(SLocEntry::get(
@@ -458,7 +460,6 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
     return std::make_pair(0, 0);
   }
   LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
-  SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
   int ID = LoadedSLocEntryTable.size();
   return std::make_pair(-ID - 1, CurrentLoadedOffset);
@@ -604,10 +605,12 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
     assert(LoadedID != -1 && "Loading sentinel FileID");
     unsigned Index = unsigned(-LoadedID) - 2;
     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
-    assert(!SLocEntryLoaded[Index] && "FileID already loaded");
+    assert((!LoadedSLocEntryTable.isMaterialized(Index) ||
+            !LoadedSLocEntryTable[Index].isLoaded()) &&
+           "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(
         LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
-    SLocEntryLoaded[Index] = true;
+    LoadedSLocEntryTable[Index].setLoaded(true);
     return FileID::get(LoadedID);
   }
   unsigned FileSize = File.getSize();
@@ -665,9 +668,11 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
     assert(LoadedID != -1 && "Loading sentinel FileID");
     unsigned Index = unsigned(-LoadedID) - 2;
     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
-    assert(!SLocEntryLoaded[Index] && "FileID already loaded");
+    assert((!LoadedSLocEntryTable.isMaterialized(Index) ||
+            !LoadedSLocEntryTable[Index].isLoaded()) &&
+           "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
-    SLocEntryLoaded[Index] = true;
+    LoadedSLocEntryTable[Index].setLoaded(true);
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
@@ -2223,7 +2228,8 @@ LLVM_DUMP_METHOD void SourceManager::dump() const {
   std::optional<SourceLocation::UIntTy> NextStart;
   for (unsigned Index = 0; Index != LoadedSLocEntryTable.size(); ++Index) {
     int ID = -(int)Index - 2;
-    if (SLocEntryLoaded[Index]) {
+    if (LoadedSLocEntryTable.isMaterialized(Index) &&
+        LoadedSLocEntryTable[Index].isLoaded()) {
       DumpSLocEntry(ID, LoadedSLocEntryTable[Index], NextStart);
       NextStart = LoadedSLocEntryTable[Index].getOffset();
     } else {
@@ -2346,7 +2352,6 @@ size_t SourceManager::getDataStructureSizes() const {
   size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
                 llvm::capacity_in_bytes(LocalSLocEntryTable) +
                 llvm::capacity_in_bytes(LoadedSLocEntryTable) +
-                llvm::capacity_in_bytes(SLocEntryLoaded) +
                 llvm::capacity_in_bytes(FileInfos);
 
   if (OverriddenFilesInfo)
diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index 667bece6d718385..dfae3a667360f3a 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -103,6 +103,12 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  [[nodiscard]] bool isMaterialized(size_t Index) const {
+    assert(Index < Size);
+    assert(Index / PageSize < PageToDataPtrs.size());
+    return PageToDataPtrs[Index / PageSize] != nullptr;
+  }
+
   /// Resize the vector. Notice that the constructor of the elements will not
   /// be invoked until an element of a given page is accessed, at which point
   /// all the elements of the page will be constructed.

>From cb27b8984ff6b388f5a84700532095ea7b59d873 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Mon, 2 Oct 2023 16:58:00 +0200
Subject: [PATCH 2/3] Update llvm/include/llvm/ADT/PagedVector.h

Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
---
 llvm/include/llvm/ADT/PagedVector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index dfae3a667360f3a..eb6dd090e437e4c 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -106,7 +106,7 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   [[nodiscard]] bool isMaterialized(size_t Index) const {
     assert(Index < Size);
     assert(Index / PageSize < PageToDataPtrs.size());
-    return PageToDataPtrs[Index / PageSize] != nullptr;
+    return PageToDataPtrs[Index / PageSize];
   }
 
   /// Resize the vector. Notice that the constructor of the elements will not

>From 578b13dbfcef664a55d1f8d24284f39f81453e55 Mon Sep 17 00:00:00 2001
From: Giulio Eulisse <10544+ktf at users.noreply.github.com>
Date: Mon, 2 Oct 2023 17:00:29 +0200
Subject: [PATCH 3/3] Update llvm/include/llvm/ADT/PagedVector.h

---
 llvm/include/llvm/ADT/PagedVector.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/include/llvm/ADT/PagedVector.h b/llvm/include/llvm/ADT/PagedVector.h
index eb6dd090e437e4c..021081ee2b6d078 100644
--- a/llvm/include/llvm/ADT/PagedVector.h
+++ b/llvm/include/llvm/ADT/PagedVector.h
@@ -103,6 +103,8 @@ template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
   /// Return the size of the vector.
   [[nodiscard]] size_t size() const { return Size; }
 
+  /// @return true in case the element at index @a Index belongs to a page which
+  /// was already materialised.
   [[nodiscard]] bool isMaterialized(size_t Index) const {
     assert(Index < Size);
     assert(Index / PageSize < PageToDataPtrs.size());



More information about the llvm-commits mailing list