[clang] [clang][modules] Remove preloaded SLocEntries from PCM files (PR #66962)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 25 12:54:43 PDT 2023


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/66962

>From 1e36c68229a98ffbdb61ea652077bc3356fc177f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 20 Sep 2023 16:50:48 -0700
Subject: [PATCH 1/3] [clang][modules] Remove preloaded SLocEntries from PCM
 files

This commit removes the list of SLocEntry offsets to preload eagerly from PCM files. Commit introducing this functionality (258ae54a) doesn't clarify why this would be more performant than the lazy approach used regularly.

Currently, the only SLocEntry the reader is supposed to preload is the predefines buffer, but in my experience, it's not actually referenced in most modules, so the time spent deserializing its SLocEntry is wasted. This is especially noticeable in the dependency scanner, where this change brings 4.56% speedup on my benchmark.
---
 .../include/clang/Serialization/ASTBitCodes.h | 10 ++------
 .../include/clang/Serialization/ModuleFile.h  |  3 ---
 clang/lib/Serialization/ASTReader.cpp         | 23 -------------------
 clang/lib/Serialization/ASTWriter.cpp         |  8 -------
 4 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 9e115f2a5cce3f9..6cbaff71cc33960 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 29;
 /// for the previous version could still support reading the new
 /// version by ignoring new kinds of subblocks), this number
 /// should be increased.
-const unsigned VERSION_MINOR = 0;
+const unsigned VERSION_MINOR = 1;
 
 /// An ID number that refers to an identifier in an AST file.
 ///
@@ -524,13 +524,7 @@ enum ASTRecordTypes {
   /// of source-location information.
   SOURCE_LOCATION_OFFSETS = 14,
 
-  /// Record code for the set of source location entries
-  /// that need to be preloaded by the AST reader.
-  ///
-  /// This set contains the source location entry for the
-  /// predefines buffer and for any file entries that need to be
-  /// preloaded.
-  SOURCE_LOCATION_PRELOADS = 15,
+  /// ID 15 used to be for source location entry preloads.
 
   /// Record code for the set of ext_vector type names.
   EXT_VECTOR_DECLS = 16,
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 0af5cae6aebc375..48be8676cc26a4c 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -292,9 +292,6 @@ class ModuleFile {
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
 
-  /// SLocEntries that we're going to preload.
-  SmallVector<uint64_t, 4> PreloadSLocEntries;
-
   /// Remapping table for source locations in this module.
   ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
       SLocRemap;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..e796617455ac390 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3226,7 +3226,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       case SOURCE_LOCATION_OFFSETS:
       case MODULE_OFFSET_MAP:
       case SOURCE_MANAGER_LINE_TABLE:
-      case SOURCE_LOCATION_PRELOADS:
       case PPD_ENTITIES_OFFSETS:
       case HEADER_SEARCH_TABLE:
       case IMPORTED_MODULES:
@@ -3576,18 +3575,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       ParseLineTable(F, Record);
       break;
 
-    case SOURCE_LOCATION_PRELOADS: {
-      // Need to transform from the local view (1-based IDs) to the global view,
-      // which is based off F.SLocEntryBaseID.
-      if (!F.PreloadSLocEntries.empty())
-        return llvm::createStringError(
-            std::errc::illegal_byte_sequence,
-            "Multiple SOURCE_LOCATION_PRELOADS records in AST file");
-
-      F.PreloadSLocEntries.swap(Record);
-      break;
-    }
-
     case EXT_VECTOR_DECLS:
       for (unsigned I = 0, N = Record.size(); I != N; ++I)
         ExtVectorDecls.push_back(getGlobalDeclID(F, Record[I]));
@@ -4417,16 +4404,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleKind Type,
   for (ImportedModule &M : Loaded) {
     ModuleFile &F = *M.Mod;
 
-    // Preload SLocEntries.
-    for (unsigned I = 0, N = F.PreloadSLocEntries.size(); I != N; ++I) {
-      int Index = int(F.PreloadSLocEntries[I] - 1) + F.SLocEntryBaseID;
-      // Load it through the SourceManager and don't call ReadSLocEntry()
-      // directly because the entry may have already been loaded in which case
-      // calling ReadSLocEntry() directly would trigger an assertion in
-      // SourceManager.
-      SourceMgr.getLoadedSLocEntryByID(Index);
-    }
-
     // Map the original source file ID into the ID space of the current
     // compilation.
     if (F.OriginalSourceFileID.isValid())
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 65bee806d2c5571..3392243d7ac4ba7 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -838,7 +838,6 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(METHOD_POOL);
   RECORD(PP_COUNTER_VALUE);
   RECORD(SOURCE_LOCATION_OFFSETS);
-  RECORD(SOURCE_LOCATION_PRELOADS);
   RECORD(EXT_VECTOR_DECLS);
   RECORD(UNUSED_FILESCOPED_DECLS);
   RECORD(PPD_ENTITIES_OFFSETS);
@@ -2137,7 +2136,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   // entry, which is always the same dummy entry.
   std::vector<uint32_t> SLocEntryOffsets;
   uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo();
-  RecordData PreloadSLocs;
   SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
   for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
        I != N; ++I) {
@@ -2213,9 +2211,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
         Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
                                   StringRef(Name.data(), Name.size() + 1));
         EmitBlob = true;
-
-        if (Name == "<built-in>")
-          PreloadSLocs.push_back(SLocEntryOffsets.size());
       }
 
       if (EmitBlob) {
@@ -2277,9 +2272,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
                               bytes(SLocEntryOffsets));
   }
-  // Write the source location entry preloads array, telling the AST
-  // reader which source locations entries it should load eagerly.
-  Stream.EmitRecord(SOURCE_LOCATION_PRELOADS, PreloadSLocs);
 
   // Write the line table. It depends on remapping working, so it must come
   // after the source location offsets.

>From f133797d2a58cf67736d294f7ce19cf65c5ae01c Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 25 Sep 2023 12:51:15 -0700
Subject: [PATCH 2/3] [clang][modules] Explicitly mark built-ins buffers as not
 part of any TU

---
 clang/lib/Basic/SourceManager.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..5902adff0ebf2c3 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2082,6 +2082,11 @@ std::pair<bool, bool> SourceManager::isInTheSameTranslationUnit(
   if (LOffs.first == ROffs.first)
     return std::make_pair(true, LOffs.second < ROffs.second);
 
+  // Built-ins are not considered part of any TU.
+  if (getBufferOrFake(LOffs.first).getBufferIdentifier() == "<built-in>" ||
+      getBufferOrFake(ROffs.first).getBufferIdentifier() == "<built-in>")
+    return std::make_pair(false, LOffs.second < ROffs.second);
+
   // If we are comparing a source location with multiple locations in the same
   // file, we get a big win by caching the result.
   InBeforeInTUCacheEntry &IsBeforeInTUCache =

>From 51894b7b6292f1d7d19b5e01e12cf67099b22e4b Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 25 Sep 2023 12:54:30 -0700
Subject: [PATCH 3/3] [clang][modules] Downgrade doc comment to regular line
 comment

---
 clang/include/clang/Serialization/ASTBitCodes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 6cbaff71cc33960..85f49e21b2e2ec1 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -524,7 +524,7 @@ enum ASTRecordTypes {
   /// of source-location information.
   SOURCE_LOCATION_OFFSETS = 14,
 
-  /// ID 15 used to be for source location entry preloads.
+  // ID 15 used to be for source location entry preloads.
 
   /// Record code for the set of ext_vector type names.
   EXT_VECTOR_DECLS = 16,



More information about the cfe-commits mailing list