[clang] [Modules] No transitive source location change (PR #86912)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 15 19:33:07 PDT 2024


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/86912

>From 8e07cbdd0f348484d532d7827e4b4a7888e70978 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Mon, 18 Mar 2024 08:36:55 +0800
Subject: [PATCH 1/3] [Modules] No transitive source location change

---
 clang/include/clang/Basic/SourceLocation.h    |  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 ++++++------
 clang/include/clang/Serialization/ASTReader.h | 54 +++++++-----
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  |  4 -
 .../Serialization/SourceLocationEncoding.h    | 88 +++++++++++++------
 clang/lib/Frontend/ASTUnit.cpp                |  2 -
 clang/lib/Serialization/ASTReader.cpp         | 84 +++++++-----------
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 41 +++++++--
 clang/lib/Serialization/ASTWriterDecl.cpp     |  8 +-
 clang/lib/Serialization/ModuleFile.cpp        |  1 -
 .../no-transitive-source-location-change.cppm | 69 +++++++++++++++
 clang/test/Modules/pr61067.cppm               | 25 ------
 .../SourceLocationEncodingTest.cpp            | 12 +--
 15 files changed, 275 insertions(+), 176 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait<SourceLocation, void>;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index f762116fea956c..f94b32d762effc 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include <cassert>
@@ -175,45 +176,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-      : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-        BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-    return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+      : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-    return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-      : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+      : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-    return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-    return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -239,8 +233,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -248,17 +244,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
-             uint64_t DeclTypesBlockStartOffset) {
-    setLocation(Loc);
+  DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
+             uint64_t DeclTypesBlockStartOffset)
+      : RawLoc(RawLoc) {
     setBitOffset(BitOffset, DeclTypesBlockStartOffset);
   }
 
-  void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
+  void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }
 
-  SourceLocation getLocation() const {
-    return SourceLocation::getFromRawEncoding(Loc);
-  }
+  RawLocEncoding getRawLoc() const { return RawLoc; }
 
   void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
     BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 370d8037a4da17..017c6b76a91495 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -696,7 +696,7 @@ class ASTReader
   /// Mapping from global submodule IDs to the module file in which the
   /// submodule resides along with the offset that should be added to the
   /// global submodule ID to produce a local ID.
-  GlobalSubmoduleMapType GlobalSubmoduleMap;
+  mutable GlobalSubmoduleMapType GlobalSubmoduleMap;
 
   /// A set of hidden declarations.
   using HiddenNames = SmallVector<Decl *, 2>;
@@ -942,6 +942,12 @@ class ASTReader
   /// Sema tracks these to emit deferred diags.
   llvm::SmallSetVector<serialization::DeclID, 4> DeclsToCheckForDeferredDiags;
 
+  /// The module files imported by different module files. Indirectly imported
+  /// module files are included too. The information comes from
+  /// ReadModuleOffsetMap(ModuleFile&).
+  mutable llvm::DenseMap<ModuleFile *, llvm::SmallVector<ModuleFile *>>
+      ImportedModuleFiles;
+
 private:
   struct ImportedSubmodule {
     serialization::SubmoduleID ID;
@@ -1761,6 +1767,7 @@ class ASTReader
 
   /// Retrieve the module manager.
   ModuleManager &getModuleManager() { return ModuleMgr; }
+  const ModuleManager &getModuleManager() const { return ModuleMgr; }
 
   /// Retrieve the preprocessor.
   Preprocessor &getPreprocessor() const { return PP; }
@@ -2170,8 +2177,8 @@ class ASTReader
 
   /// Retrieve the global submodule ID given a module and its local ID
   /// number.
-  serialization::SubmoduleID
-  getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
+  serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
+                                                  unsigned LocalID) const;
 
   /// Retrieve the submodule that corresponds to a global submodule ID.
   ///
@@ -2184,7 +2191,7 @@ class ASTReader
 
   /// Retrieve the module file with a given local ID within the specified
   /// ModuleFile.
-  ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
+  ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;
 
   /// Get an ID for the given module file.
   unsigned getModuleFileID(ModuleFile *M);
@@ -2220,40 +2227,47 @@ class ASTReader
     return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-                                                LocSeq *Seq = nullptr) const {
+  std::pair<SourceLocation, unsigned>
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+                                 LocSeq *Seq = nullptr) const {
     return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
-                                    SourceLocation::UIntTy Raw,
-                                    LocSeq *Seq = nullptr) const {
-    SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-    return TranslateSourceLocation(ModuleFile, Loc);
+  SourceLocation ReadRawSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
+                                       LocSeq *Seq = nullptr) const {
+    if (!MF.ModuleOffsetMap.empty())
+      ReadModuleOffsetMap(MF);
+
+    auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+    ModuleFile *ModuleFileHomingLoc =
+        ModuleFileIndex ? ImportedModuleFiles[&MF][ModuleFileIndex - 1] : &MF;
+    return TranslateSourceLocation(*ModuleFileHomingLoc, Loc);
   }
 
   /// Translate a source location from another module file's source
   /// location space into ours.
   SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
                                          SourceLocation Loc) const {
-    if (!ModuleFile.ModuleOffsetMap.empty())
-      ReadModuleOffsetMap(ModuleFile);
-    assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
-               ModuleFile.SLocRemap.end() &&
-           "Cannot find offset to remap.");
-    SourceLocation::IntTy Remap =
-        ModuleFile.SLocRemap.find(Loc.getOffset())->second;
-    return Loc.getLocWithOffset(Remap);
+    if (Loc.isInvalid())
+      return Loc;
+
+    // It implies that the Loc is already translated.
+    if (SourceMgr.isLoadedSourceLocation(Loc))
+      return Loc;
+
+    return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
   }
 
   /// Read a source location.
   SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
                                     const RecordDataImpl &Record, unsigned &Idx,
                                     LocSeq *Seq = nullptr) {
-    return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
+    return ReadRawSourceLocation(ModuleFile, Record[Idx++], Seq);
   }
 
   /// Read a FileID.
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 214eb3601148b0..b1b3359050c1c0 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -649,6 +649,10 @@ class ASTWriter : public ASTDeserializationListener,
   void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
                          LocSeq *Seq = nullptr);
 
+  /// Return the raw encodings for source locations.
+  SourceLocationEncoding::RawLocEncoding
+  getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
+
   /// Emit a source range.
   void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
                       LocSeq *Seq = nullptr);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index bc0aa89966c2b4..ea24b44e5e411b 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -295,10 +295,6 @@ class ModuleFile {
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
 
-  /// Remapping table for source locations in this module.
-  ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
-      SLocRemap;
-
   // === Identifiers ===
 
   /// The number of identifiers in this AST file.
diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h
index 9bb0dbe2e4d6f6..c7843a0909c095 100644
--- a/clang/include/clang/Serialization/SourceLocationEncoding.h
+++ b/clang/include/clang/Serialization/SourceLocationEncoding.h
@@ -6,23 +6,27 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Source locations are stored pervasively in the AST, making up a third of
-// the size of typical serialized files. Storing them efficiently is important.
+// We wish to encode the SourceLocation from other module file not dependent
+// on the other module file. So that the source location changes from other
+// module file may not affect the contents of the current module file. Then the
+// users don't need to recompile the whole project due to a new line in a module
+// unit in the root of the dependency graph.
 //
-// We use integers optimized by VBR-encoding, because:
-//  - when abbreviations cannot be used, VBR6 encoding is our only choice
-//  - in the worst case a SourceLocation can be ~any 32-bit number, but in
-//    practice they are highly predictable
+// To achieve this, we need to encode the index of the module file into the
+// encoding of the source location. The encoding of the source location may be:
 //
-// We encode the integer so that likely values encode as small numbers that
-// turn into few VBR chunks:
-//  - the invalid sentinel location is a very common value: it encodes as 0
-//  - the "macro or not" bit is stored at the bottom of the integer
-//    (rather than at the top, as in memory), so macro locations can have
-//    small representations.
-//  - related locations (e.g. of a left and right paren pair) are usually
-//    similar, so when encoding a sequence of locations we store only
-//    differences between successive elements.
+//      |-----------------------|-----------------------|
+//      |          A            |         B         | C |
+//
+//  * A: 32 bit. The index of the module file in the module manager + 1. The +1
+//  here is necessary since we wish 0 stands for the current module file.
+//  * B: 31 bit. The offset of the source location to the module file containing
+//  it.
+//  * C: The macro bit. We rotate it to the lowest bit so that we can save some
+//  space in case the index of the module file is 0.
+//
+// Specially, if the index of the module file is 0, we allow to encode a sequence
+// of locations we store only differences between successive elements.
 //
 //===----------------------------------------------------------------------===//
 
@@ -52,9 +56,13 @@ class SourceLocationEncoding {
   friend SourceLocationSequence;
 
 public:
-  static uint64_t encode(SourceLocation Loc,
-                         SourceLocationSequence * = nullptr);
-  static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
+  using RawLocEncoding = uint64_t;
+
+  static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
+                               unsigned BaseModuleFileIndex,
+                               SourceLocationSequence * = nullptr);
+  static std::pair<SourceLocation, unsigned>
+  decode(RawLocEncoding, SourceLocationSequence * = nullptr);
 };
 
 /// Serialized encoding of a sequence of SourceLocations.
@@ -149,14 +157,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return &Seq; }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-                                               SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+                               unsigned BaseModuleFileIndex,
+                               SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+    return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+    return 0;
+  
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+  assert(Encoded < ((RawLocEncoding)1 << 32));
+
+  assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
+  Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
+  return Encoded;
 }
-inline SourceLocation
-SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
-  return Seq ? Seq->decode(Encoded)
-             : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
+inline std::pair<SourceLocation, unsigned>
+SourceLocationEncoding::decode(RawLocEncoding Encoded,
+                               SourceLocationSequence *Seq) {
+  unsigned ModuleFileIndex = Encoded >> 32;
+
+  if (!ModuleFileIndex)
+    return {Seq ? Seq->decode(Encoded)
+             : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
+            ModuleFileIndex};
+
+  Encoded &= ((RawLocEncoding)1 << 33) - 1;
+  SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
+
+  return {Loc, ModuleFileIndex};
 }
 
 } // namespace clang
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 3610a08831e79a..1c655260b09eb5 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -2373,8 +2373,6 @@ bool ASTUnit::serialize(raw_ostream &OS) {
   return serializeUnit(Writer, Buffer, getSema(), OS);
 }
 
-using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;
-
 void ASTUnit::TranslateStoredDiagnostics(
                           FileManager &FileMgr,
                           SourceManager &SrcMgr,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index fa5bb9f2d5435a..0aa4b05ffefd29 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1647,7 +1647,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
     if (!File)
       return true;
 
-    SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
+    SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]);
     if (IncludeLoc.isInvalid() && F->Kind != MK_MainFile) {
       // This is the module's main file.
       IncludeLoc = getImportLocation(F);
@@ -1689,7 +1689,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
     unsigned Offset = Record[0];
     SrcMgr::CharacteristicKind
       FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
-    SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
+    SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]);
     if (IncludeLoc.isInvalid() && F->isModule()) {
       IncludeLoc = getImportLocation(F);
     }
@@ -1709,9 +1709,9 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
   case SM_SLOC_EXPANSION_ENTRY: {
     LocSeq::State Seq;
-    SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
-    SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
-    SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
+    SourceLocation SpellingLoc = ReadRawSourceLocation(*F, Record[1], Seq);
+    SourceLocation ExpansionBegin = ReadRawSourceLocation(*F, Record[2], Seq);
+    SourceLocation ExpansionEnd = ReadRawSourceLocation(*F, Record[3], Seq);
     SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
                                  Record[5], Record[4], ID,
                                  BaseOffset + Record[0]);
@@ -3040,8 +3040,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         // The import location will be the local one for now; we will adjust
         // all import locations of module imports after the global source
         // location info are setup, in ReadAST.
-        SourceLocation ImportLoc =
+        auto [ImportLoc, ImportModuleFileIndex] =
             ReadUntranslatedSourceLocation(Record[Idx++]);
+        // The import location must belong to the current module file itself.
+        assert(ImportModuleFileIndex == 0);
         off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0;
         time_t StoredModTime =
             !IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0;
@@ -3660,13 +3662,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
           std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset
                            - SLocSpaceSize,&F));
 
-      // Initialize the remapping table.
-      // Invalid stays invalid.
-      F.SLocRemap.insertOrReplace(std::make_pair(0U, 0));
-      // This module. Base was 2 when being compiled.
-      F.SLocRemap.insertOrReplace(std::make_pair(
-          2U, static_cast<SourceLocation::IntTy>(F.SLocEntryBaseOffset - 2)));
-
       TotalNumSLocEntries += F.LocalNumSLocEntries;
       break;
     }
@@ -3943,7 +3938,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       if (Record.size() != 1)
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma optimize record");
-      OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
+      OptimizeOffPragmaLocation = ReadRawSourceLocation(F, Record[0]);
       break;
 
     case MSSTRUCT_PRAGMA_OPTIONS:
@@ -3959,7 +3954,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
             std::errc::illegal_byte_sequence,
             "invalid pragma pointers to members record");
       PragmaMSPointersToMembersState = Record[0];
-      PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
+      PointersToMembersPragmaLocation = ReadRawSourceLocation(F, Record[1]);
       break;
 
     case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES:
@@ -3980,7 +3975,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma pack record");
       PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]);
-      PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]);
+      PragmaAlignPackCurrentLocation = ReadRawSourceLocation(F, Record[1]);
       unsigned NumStackEntries = Record[2];
       unsigned Idx = 3;
       // Reset the stack when importing a new module.
@@ -3988,8 +3983,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       for (unsigned I = 0; I < NumStackEntries; ++I) {
         PragmaAlignPackStackEntry Entry;
         Entry.Value = ReadAlignPackInfo(Record[Idx++]);
-        Entry.Location = ReadSourceLocation(F, Record[Idx++]);
-        Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
+        Entry.Location = ReadRawSourceLocation(F, Record[Idx++]);
+        Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]);
         PragmaAlignPackStrings.push_back(ReadString(Record, Idx));
         Entry.SlotLabel = PragmaAlignPackStrings.back();
         PragmaAlignPackStack.push_back(Entry);
@@ -4002,7 +3997,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma float control record");
       FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]);
-      FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]);
+      FpPragmaCurrentLocation = ReadRawSourceLocation(F, Record[1]);
       unsigned NumStackEntries = Record[2];
       unsigned Idx = 3;
       // Reset the stack when importing a new module.
@@ -4010,8 +4005,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       for (unsigned I = 0; I < NumStackEntries; ++I) {
         FpPragmaStackEntry Entry;
         Entry.Value = FPOptionsOverride::getFromOpaqueInt(Record[Idx++]);
-        Entry.Location = ReadSourceLocation(F, Record[Idx++]);
-        Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
+        Entry.Location = ReadRawSourceLocation(F, Record[Idx++]);
+        Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]);
         FpPragmaStrings.push_back(ReadString(Record, Idx));
         Entry.SlotLabel = FpPragmaStrings.back();
         FpPragmaStack.push_back(Entry);
@@ -4035,18 +4030,7 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size();
   F.ModuleOffsetMap = StringRef();
 
-  // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders.
-  if (F.SLocRemap.find(0) == F.SLocRemap.end()) {
-    F.SLocRemap.insert(std::make_pair(0U, 0));
-    F.SLocRemap.insert(std::make_pair(2U, 1));
-  }
-
-  // Continuous range maps we may be updating in our module.
-  using SLocRemapBuilder =
-      ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy,
-                         2>::Builder;
   using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
-  SLocRemapBuilder SLocRemap(F.SLocRemap);
   RemapBuilder IdentifierRemap(F.IdentifierRemap);
   RemapBuilder MacroRemap(F.MacroRemap);
   RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap);
@@ -4055,6 +4039,9 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   RemapBuilder DeclRemap(F.DeclRemap);
   RemapBuilder TypeRemap(F.TypeRemap);
 
+  auto &ImportedModuleVector = ImportedModuleFiles[&F];
+  assert(ImportedModuleVector.empty());
+
   while (Data < DataEnd) {
     // FIXME: Looking up dependency modules by filename is horrible. Let's
     // start fixing this with prebuilt, explicit and implicit modules and see
@@ -4078,8 +4065,8 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
       return;
     }
 
-    SourceLocation::UIntTy SLocOffset =
-        endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Data);
+    ImportedModuleVector.push_back(OM);
+
     uint32_t IdentifierIDOffset =
         endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Data);
     uint32_t MacroIDOffset =
@@ -4103,13 +4090,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
                                     static_cast<int>(BaseOffset - Offset)));
     };
 
-    constexpr SourceLocation::UIntTy SLocNone =
-        std::numeric_limits<SourceLocation::UIntTy>::max();
-    if (SLocOffset != SLocNone)
-      SLocRemap.insert(std::make_pair(
-          SLocOffset, static_cast<SourceLocation::IntTy>(
-                          OM->SLocEntryBaseOffset - SLocOffset)));
-
     mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap);
     mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap);
     mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID,
@@ -5770,7 +5750,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
-      SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
+      SourceLocation DefinitionLoc = ReadRawSourceLocation(F, Record[Idx++]);
       bool IsFramework = Record[Idx++];
       bool IsExplicit = Record[Idx++];
       bool IsSystem = Record[Idx++];
@@ -6245,8 +6225,8 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
   unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
   assert(LocalIndex < M->NumPreprocessedSkippedRanges);
   PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
-  SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()),
-                    TranslateSourceLocation(*M, RawRange.getEnd()));
+  SourceRange Range(ReadRawSourceLocation(*M, RawRange.getBegin()),
+                    ReadRawSourceLocation(*M, RawRange.getEnd()));
   assert(Range.isValid());
   return Range;
 }
@@ -6282,8 +6262,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
     return nullptr;
 
   // Read the record.
-  SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()),
-                    TranslateSourceLocation(M, PPOffs.getEnd()));
+  SourceRange Range(ReadRawSourceLocation(M, PPOffs.getBegin()),
+                    ReadRawSourceLocation(M, PPOffs.getEnd()));
   PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
   StringRef Blob;
   RecordData Record;
@@ -6395,7 +6375,7 @@ struct PPEntityComp {
   }
 
   SourceLocation getLoc(const PPEntityOffset &PPE) const {
-    return Reader.TranslateSourceLocation(M, PPE.getBegin());
+    return Reader.ReadRawSourceLocation(M, PPE.getBegin());
   }
 };
 
@@ -6439,7 +6419,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
       PPI = First;
       std::advance(PPI, Half);
       if (SourceMgr.isBeforeInTranslationUnit(
-              TranslateSourceLocation(M, PPI->getEnd()), Loc)) {
+              ReadRawSourceLocation(M, PPI->getEnd()), Loc)) {
         First = PPI;
         ++First;
         Count = Count - Half - 1;
@@ -6480,7 +6460,7 @@ std::optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
   unsigned LocalIndex = PPInfo.second;
   const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
 
-  SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin());
+  SourceLocation Loc = ReadRawSourceLocation(M, PPOffs.getBegin());
   if (Loc.isInvalid())
     return false;
 
@@ -6646,7 +6626,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
     // Read the final state.
     assert(Idx < Record.size() &&
            "Invalid data, missing final pragma diagnostic state");
-    SourceLocation CurStateLoc = ReadSourceLocation(F, Record[Idx++]);
+    SourceLocation CurStateLoc = ReadRawSourceLocation(F, Record[Idx++]);
     auto *CurState = ReadDiagState(*FirstState, false);
 
     if (!F.isModule()) {
@@ -8950,7 +8930,7 @@ MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) {
 }
 
 serialization::SubmoduleID
-ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) {
+ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const {
   if (LocalID < NUM_PREDEF_SUBMODULE_IDS)
     return LocalID;
 
@@ -8983,7 +8963,7 @@ Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }
 
-ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) {
+ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) const {
   if (ID & 1) {
     // It's a module, look it up by submodule ID.
     auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(M, ID >> 1));
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index a22f760408c634..f3192b3d67508c 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3246,7 +3246,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   ModuleFile *M = I->second;
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
-  Loc = TranslateSourceLocation(*M, DOffs.getLocation());
+  Loc = ReadRawSourceLocation(*M, DOffs.getRawLoc());
   return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
 }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index baf03f69d73065..2027867eaf78f9 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2663,8 +2663,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
 
     uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
     assert((Offset >> 32) == 0 && "Preprocessed entity offset too large");
-    PreprocessedEntityOffsets.push_back(
-        PPEntityOffset(getAdjustedRange((*E)->getSourceRange()), Offset));
+    SourceRange R = getAdjustedRange((*E)->getSourceRange());
+    PreprocessedEntityOffsets.emplace_back(
+        getRawSourceLocationEncoding(R.getBegin()),
+        getRawSourceLocationEncoding(R.getEnd()), Offset);
 
     if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
@@ -2731,7 +2733,9 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
     std::vector<PPSkippedRange> SerializedSkippedRanges;
     SerializedSkippedRanges.reserve(SkippedRanges.size());
     for (auto const& Range : SkippedRanges)
-      SerializedSkippedRanges.emplace_back(Range);
+      SerializedSkippedRanges.emplace_back(
+          getRawSourceLocationEncoding(Range.getBegin()),
+          getRawSourceLocationEncoding(Range.getEnd()));
 
     using namespace llvm;
     auto Abbrev = std::make_shared<BitCodeAbbrev>();
@@ -2897,8 +2901,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
       ParentID = SubmoduleIDs[Mod->Parent];
     }
 
-    uint64_t DefinitionLoc =
-        SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc));
+    SourceLocationEncoding::RawLocEncoding DefinitionLoc =
+        getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc));
 
     // Emit the definition of the block.
     {
@@ -5088,7 +5092,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
         // These values should be unique within a chain, since they will be read
         // as keys into ContinuousRangeMaps.
-        writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries);
         writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers);
         writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros);
         writeBaseIDOrNone(M.BasePreprocessedEntityID,
@@ -5574,10 +5577,34 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) {
   Record.push_back(getAdjustedFileID(FID).getOpaqueValue());
 }
 
+SourceLocationEncoding::RawLocEncoding
+ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) {
+  unsigned BaseOffset = 0;
+  unsigned ModuleFileIndex = 0;
+
+  // See SourceLocationEncoding.h for the encoding details.
+  if (Context->getSourceManager().isLoadedSourceLocation(Loc) &&
+      Loc.isValid()) {
+    assert(getChain());
+    auto SLocMapI = getChain()->GlobalSLocOffsetMap.find(
+        SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
+    assert(SLocMapI != getChain()->GlobalSLocOffsetMap.end() &&
+           "Corrupted global sloc offset map");
+    ModuleFile *F = SLocMapI->second;
+    BaseOffset = F->SLocEntryBaseOffset - 2;
+    // 0 means the location is not loaded. So we need to add 1 to the index to
+    // make it clear.
+    ModuleFileIndex = F->Index + 1;
+    assert(&getChain()->getModuleManager()[F->Index] == F);
+  }
+
+  return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq);
+}
+
 void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
                                   SourceLocationSequence *Seq) {
   Loc = getAdjustedLocation(Loc);
-  Record.push_back(SourceLocationEncoding::encode(Loc, Seq));
+  Record.push_back(getRawSourceLocationEncoding(Loc, Seq));
 }
 
 void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record,
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 86f64bf2a24250..b113c433c4223c 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2777,14 +2777,16 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
 
   // Record the offset for this declaration
   SourceLocation Loc = D->getLocation();
+  SourceLocationEncoding::RawLocEncoding RawLoc =
+      getRawSourceLocationEncoding(getAdjustedLocation(Loc));
+
   unsigned Index = ID - FirstDeclID;
   if (DeclOffsets.size() == Index)
-    DeclOffsets.emplace_back(getAdjustedLocation(Loc), Offset,
-                             DeclTypesBlockStartOffset);
+    DeclOffsets.emplace_back(RawLoc, Offset, DeclTypesBlockStartOffset);
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
-    DeclOffsets[Index].setLocation(getAdjustedLocation(Loc));
+    DeclOffsets[Index].setRawLoc(RawLoc);
     DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset);
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp
index db896fd361159d..2c42d33a8f5dd3 100644
--- a/clang/lib/Serialization/ModuleFile.cpp
+++ b/clang/lib/Serialization/ModuleFile.cpp
@@ -59,7 +59,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() {
   // Remapping tables.
   llvm::errs() << "  Base source location offset: " << SLocEntryBaseOffset
                << '\n';
-  dumpLocalRemap("Source location offset local -> global map", SLocRemap);
 
   llvm::errs() << "  Base identifier ID: " << BaseIdentifierID << '\n'
                << "  Number of identifiers: " << LocalNumIdentifiers << '\n';
diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm
new file mode 100644
index 00000000000000..b876fbb6d9f0cf
--- /dev/null
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -0,0 +1,69 @@
+// Testing that adding a new line in a module interface unit won't cause the BMI
+// of consuming module unit changes.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm
+//
+// The BMI may not be the same since the source location differs.
+// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null
+//
+// The BMI of B shouldn't change since all the locations remain the same.
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/B.v1.pcm
+// RUN: diff %t/B.v1.pcm %t/B.pcm  &> /dev/null
+//
+// The BMI of C may change since the locations for instantiations changes.
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/C.pcm
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/C.v1.pcm
+// RUN: not diff %t/C.v1.pcm %t/C.pcm  &> /dev/null
+
+//--- A.cppm
+export module A;
+export template <class T>
+struct C {
+    T func() {
+        return T(43);
+    }
+};
+export int funcA() {
+    return 43;
+}
+
+//--- A.v1.cppm
+export module A;
+
+export template <class T>
+struct C {
+    T func() {
+        return T(43);
+    }
+};
+export int funcA() {
+    return 43;
+}
+
+//--- B.cppm
+export module B;
+import A;
+
+export int funcB() {
+    return funcA();
+}
+
+//--- C.cppm
+export module C;
+import A;
+export void testD() {
+    C<int> c;
+    c.func();
+}
diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm
index b7f9d22e253854..f853491fe76bf7 100644
--- a/clang/test/Modules/pr61067.cppm
+++ b/clang/test/Modules/pr61067.cppm
@@ -9,22 +9,6 @@
 // RUN:     -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm
 // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \
 // RUN:     -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \
-// RUN:     -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp
-
-// Test again with reduced BMI
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-//
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \
-// RUN:     -emit-reduced-module-interface -o %t/a.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \
-// RUN:     -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \
-// RUN:     -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \
-// RUN:     -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp
 
 //--- a.cppm
 export module a;
@@ -43,12 +27,3 @@ void b() {
 }
 
 // CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_(
-
-//--- c.cpp
-import a;
-
-int c() {
-    (void)(a() == a());
-}
-
-// CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_(
diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
index 141da4c27f8d0b..c80a8fd0e52b17 100644
--- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
+++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
@@ -23,13 +23,14 @@ using LocSeq = SourceLocationSequence;
 // Loc is the raw (in-memory) form of SourceLocation.
 void roundTrip(SourceLocation::UIntTy Loc,
                std::optional<uint64_t> ExpectedEncoded = std::nullopt) {
-  uint64_t ActualEncoded =
-      SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc));
+  uint64_t ActualEncoded = SourceLocationEncoding::encode(
+      SourceLocation::getFromRawEncoding(Loc), /*BaseOffset=*/0,
+      /*BaseModuleFileIndex=*/0);
   if (ExpectedEncoded) {
     ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc;
   }
   SourceLocation::UIntTy DecodedEncoded =
-      SourceLocationEncoding::decode(ActualEncoded).getRawEncoding();
+      SourceLocationEncoding::decode(ActualEncoded).first.getRawEncoding();
   ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded;
 }
 
@@ -41,7 +42,8 @@ void roundTrip(std::vector<SourceLocation::UIntTy> Locs,
     LocSeq::State Seq;
     for (auto L : Locs)
       ActualEncoded.push_back(SourceLocationEncoding::encode(
-          SourceLocation::getFromRawEncoding(L), Seq));
+          SourceLocation::getFromRawEncoding(L), /*BaseOffset=*/0,
+          /*BaseModuleFileIndex=*/0, Seq));
     if (!ExpectedEncoded.empty()) {
       ASSERT_EQ(ActualEncoded, ExpectedEncoded)
           << "Encoding " << testing::PrintToString(Locs);
@@ -51,7 +53,7 @@ void roundTrip(std::vector<SourceLocation::UIntTy> Locs,
   {
     LocSeq::State Seq;
     for (auto L : ActualEncoded) {
-      SourceLocation Loc = SourceLocationEncoding::decode(L, Seq);
+      SourceLocation Loc = SourceLocationEncoding::decode(L, Seq).first;
       DecodedEncoded.push_back(Loc.getRawEncoding());
     }
     ASSERT_EQ(DecodedEncoded, Locs)

>From ec77da428ca6d24b6dbf063f4c281fb661acfb20 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Mon, 15 Apr 2024 16:00:21 +0800
Subject: [PATCH 2/3] address comments

---
 clang/include/clang/Serialization/ASTReader.h | 18 +++----
 .../include/clang/Serialization/ModuleFile.h  | 10 +++-
 .../Serialization/SourceLocationEncoding.h    |  6 ++-
 clang/lib/Serialization/ASTReader.cpp         | 48 +++++++++----------
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 5 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 017c6b76a91495..22d1f499e88537 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -696,7 +696,7 @@ class ASTReader
   /// Mapping from global submodule IDs to the module file in which the
   /// submodule resides along with the offset that should be added to the
   /// global submodule ID to produce a local ID.
-  mutable GlobalSubmoduleMapType GlobalSubmoduleMap;
+  GlobalSubmoduleMapType GlobalSubmoduleMap;
 
   /// A set of hidden declarations.
   using HiddenNames = SmallVector<Decl *, 2>;
@@ -942,12 +942,6 @@ class ASTReader
   /// Sema tracks these to emit deferred diags.
   llvm::SmallSetVector<serialization::DeclID, 4> DeclsToCheckForDeferredDiags;
 
-  /// The module files imported by different module files. Indirectly imported
-  /// module files are included too. The information comes from
-  /// ReadModuleOffsetMap(ModuleFile&).
-  mutable llvm::DenseMap<ModuleFile *, llvm::SmallVector<ModuleFile *>>
-      ImportedModuleFiles;
-
 private:
   struct ImportedSubmodule {
     serialization::SubmoduleID ID;
@@ -2238,15 +2232,15 @@ class ASTReader
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadRawSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
+  SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
                                        LocSeq *Seq = nullptr) const {
     if (!MF.ModuleOffsetMap.empty())
       ReadModuleOffsetMap(MF);
 
     auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
-    ModuleFile *ModuleFileHomingLoc =
-        ModuleFileIndex ? ImportedModuleFiles[&MF][ModuleFileIndex - 1] : &MF;
-    return TranslateSourceLocation(*ModuleFileHomingLoc, Loc);
+    ModuleFile *OwningModuleFile =
+        ModuleFileIndex ? MF.DependentModules[ModuleFileIndex - 1] : &MF;
+    return TranslateSourceLocation(*OwningModuleFile, Loc);
   }
 
   /// Translate a source location from another module file's source
@@ -2267,7 +2261,7 @@ class ASTReader
   SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
                                     const RecordDataImpl &Record, unsigned &Idx,
                                     LocSeq *Seq = nullptr) {
-    return ReadRawSourceLocation(ModuleFile, Record[Idx++], Seq);
+    return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
   }
 
   /// Read a FileID.
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index ea24b44e5e411b..4c33dbc51b20e3 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -508,9 +508,17 @@ class ModuleFile {
   /// List of modules which depend on this module
   llvm::SetVector<ModuleFile *> ImportedBy;
 
-  /// List of modules which this module depends on
+  /// List of modules which this module directly imported
   llvm::SetVector<ModuleFile *> Imports;
 
+  /// List of modules which this modules dependent on. Different
+  /// from `Imports`, this includes indirectly imported modules too.
+  /// The order of DependentModules is significant. It should keep
+  /// the same order with that module file manager when we write
+  /// the current module file. The value of the member will be initialized
+  /// in `ASTReader::ReadModuleOffsetMap`.
+  llvm::SmallVector<ModuleFile *, 16> DependentModules;
+
   /// Determine whether this module was directly imported at
   /// any point during translation.
   bool isDirectlyImported() const { return DirectlyImported; }
diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h
index c7843a0909c095..8e0959146fac6a 100644
--- a/clang/include/clang/Serialization/SourceLocationEncoding.h
+++ b/clang/include/clang/Serialization/SourceLocationEncoding.h
@@ -32,6 +32,7 @@
 
 #include <climits>
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/Support/MathExtras.h"
 
 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
@@ -177,7 +178,8 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
   RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
   assert(Encoded < ((RawLocEncoding)1 << 32));
 
-  assert(BaseModuleFileIndex < ((RawLocEncoding)1 << 32));
+  // 16 bits should be sufficient to store the module file index.
+  assert(BaseModuleFileIndex < (1 << 16));
   Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
   return Encoded;
 }
@@ -191,7 +193,7 @@ SourceLocationEncoding::decode(RawLocEncoding Encoded,
              : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
             ModuleFileIndex};
 
-  Encoded &= ((RawLocEncoding)1 << 33) - 1;
+  Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
   SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
 
   return {Loc, ModuleFileIndex};
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0aa4b05ffefd29..c9d4e666b8647e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1647,7 +1647,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
     if (!File)
       return true;
 
-    SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]);
+    SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
     if (IncludeLoc.isInvalid() && F->Kind != MK_MainFile) {
       // This is the module's main file.
       IncludeLoc = getImportLocation(F);
@@ -1689,7 +1689,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
     unsigned Offset = Record[0];
     SrcMgr::CharacteristicKind
       FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
-    SourceLocation IncludeLoc = ReadRawSourceLocation(*F, Record[1]);
+    SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
     if (IncludeLoc.isInvalid() && F->isModule()) {
       IncludeLoc = getImportLocation(F);
     }
@@ -1709,9 +1709,9 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
   case SM_SLOC_EXPANSION_ENTRY: {
     LocSeq::State Seq;
-    SourceLocation SpellingLoc = ReadRawSourceLocation(*F, Record[1], Seq);
-    SourceLocation ExpansionBegin = ReadRawSourceLocation(*F, Record[2], Seq);
-    SourceLocation ExpansionEnd = ReadRawSourceLocation(*F, Record[3], Seq);
+    SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
+    SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
+    SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
     SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
                                  Record[5], Record[4], ID,
                                  BaseOffset + Record[0]);
@@ -3938,7 +3938,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       if (Record.size() != 1)
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma optimize record");
-      OptimizeOffPragmaLocation = ReadRawSourceLocation(F, Record[0]);
+      OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
       break;
 
     case MSSTRUCT_PRAGMA_OPTIONS:
@@ -3954,7 +3954,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
             std::errc::illegal_byte_sequence,
             "invalid pragma pointers to members record");
       PragmaMSPointersToMembersState = Record[0];
-      PointersToMembersPragmaLocation = ReadRawSourceLocation(F, Record[1]);
+      PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
       break;
 
     case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES:
@@ -3975,7 +3975,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma pack record");
       PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]);
-      PragmaAlignPackCurrentLocation = ReadRawSourceLocation(F, Record[1]);
+      PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]);
       unsigned NumStackEntries = Record[2];
       unsigned Idx = 3;
       // Reset the stack when importing a new module.
@@ -3983,8 +3983,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       for (unsigned I = 0; I < NumStackEntries; ++I) {
         PragmaAlignPackStackEntry Entry;
         Entry.Value = ReadAlignPackInfo(Record[Idx++]);
-        Entry.Location = ReadRawSourceLocation(F, Record[Idx++]);
-        Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]);
+        Entry.Location = ReadSourceLocation(F, Record[Idx++]);
+        Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
         PragmaAlignPackStrings.push_back(ReadString(Record, Idx));
         Entry.SlotLabel = PragmaAlignPackStrings.back();
         PragmaAlignPackStack.push_back(Entry);
@@ -3997,7 +3997,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         return llvm::createStringError(std::errc::illegal_byte_sequence,
                                        "invalid pragma float control record");
       FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]);
-      FpPragmaCurrentLocation = ReadRawSourceLocation(F, Record[1]);
+      FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]);
       unsigned NumStackEntries = Record[2];
       unsigned Idx = 3;
       // Reset the stack when importing a new module.
@@ -4005,8 +4005,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       for (unsigned I = 0; I < NumStackEntries; ++I) {
         FpPragmaStackEntry Entry;
         Entry.Value = FPOptionsOverride::getFromOpaqueInt(Record[Idx++]);
-        Entry.Location = ReadRawSourceLocation(F, Record[Idx++]);
-        Entry.PushLocation = ReadRawSourceLocation(F, Record[Idx++]);
+        Entry.Location = ReadSourceLocation(F, Record[Idx++]);
+        Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
         FpPragmaStrings.push_back(ReadString(Record, Idx));
         Entry.SlotLabel = FpPragmaStrings.back();
         FpPragmaStack.push_back(Entry);
@@ -4039,7 +4039,7 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   RemapBuilder DeclRemap(F.DeclRemap);
   RemapBuilder TypeRemap(F.TypeRemap);
 
-  auto &ImportedModuleVector = ImportedModuleFiles[&F];
+  auto &ImportedModuleVector = F.DependentModules;
   assert(ImportedModuleVector.empty());
 
   while (Data < DataEnd) {
@@ -4059,7 +4059,7 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
                           : ModuleMgr.lookupByFileName(Name));
     if (!OM) {
       std::string Msg =
-          "SourceLocation remap refers to unknown module, cannot find ";
+          "cannot find module ";
       Msg.append(std::string(Name));
       Error(Msg);
       return;
@@ -5750,7 +5750,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
-      SourceLocation DefinitionLoc = ReadRawSourceLocation(F, Record[Idx++]);
+      SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
       bool IsFramework = Record[Idx++];
       bool IsExplicit = Record[Idx++];
       bool IsSystem = Record[Idx++];
@@ -6225,8 +6225,8 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
   unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
   assert(LocalIndex < M->NumPreprocessedSkippedRanges);
   PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
-  SourceRange Range(ReadRawSourceLocation(*M, RawRange.getBegin()),
-                    ReadRawSourceLocation(*M, RawRange.getEnd()));
+  SourceRange Range(ReadSourceLocation(*M, RawRange.getBegin()),
+                    ReadSourceLocation(*M, RawRange.getEnd()));
   assert(Range.isValid());
   return Range;
 }
@@ -6262,8 +6262,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
     return nullptr;
 
   // Read the record.
-  SourceRange Range(ReadRawSourceLocation(M, PPOffs.getBegin()),
-                    ReadRawSourceLocation(M, PPOffs.getEnd()));
+  SourceRange Range(ReadSourceLocation(M, PPOffs.getBegin()),
+                    ReadSourceLocation(M, PPOffs.getEnd()));
   PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
   StringRef Blob;
   RecordData Record;
@@ -6375,7 +6375,7 @@ struct PPEntityComp {
   }
 
   SourceLocation getLoc(const PPEntityOffset &PPE) const {
-    return Reader.ReadRawSourceLocation(M, PPE.getBegin());
+    return Reader.ReadSourceLocation(M, PPE.getBegin());
   }
 };
 
@@ -6419,7 +6419,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
       PPI = First;
       std::advance(PPI, Half);
       if (SourceMgr.isBeforeInTranslationUnit(
-              ReadRawSourceLocation(M, PPI->getEnd()), Loc)) {
+              ReadSourceLocation(M, PPI->getEnd()), Loc)) {
         First = PPI;
         ++First;
         Count = Count - Half - 1;
@@ -6460,7 +6460,7 @@ std::optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
   unsigned LocalIndex = PPInfo.second;
   const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
 
-  SourceLocation Loc = ReadRawSourceLocation(M, PPOffs.getBegin());
+  SourceLocation Loc = ReadSourceLocation(M, PPOffs.getBegin());
   if (Loc.isInvalid())
     return false;
 
@@ -6626,7 +6626,7 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
     // Read the final state.
     assert(Idx < Record.size() &&
            "Invalid data, missing final pragma diagnostic state");
-    SourceLocation CurStateLoc = ReadRawSourceLocation(F, Record[Idx++]);
+    SourceLocation CurStateLoc = ReadSourceLocation(F, Record[Idx++]);
     auto *CurState = ReadDiagState(*FirstState, false);
 
     if (!F.isModule()) {
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index f3192b3d67508c..fb558728b32591 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3246,7 +3246,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   ModuleFile *M = I->second;
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
-  Loc = ReadRawSourceLocation(*M, DOffs.getRawLoc());
+  Loc = ReadSourceLocation(*M, DOffs.getRawLoc());
   return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
 }
 

>From bae4c222aa3c2b244f547290a058b28417375b16 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 16 Apr 2024 10:32:34 +0800
Subject: [PATCH 3/3] address comments

---
 clang/include/clang/Serialization/ASTReader.h       | 13 +++++++++----
 .../clang/Serialization/SourceLocationEncoding.h    |  1 -
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 22d1f499e88537..019802e89b48ef 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2239,7 +2239,10 @@ class ASTReader
 
     auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
     ModuleFile *OwningModuleFile =
-        ModuleFileIndex ? MF.DependentModules[ModuleFileIndex - 1] : &MF;
+        ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1];
+
+    assert(!SourceMgr.isLoadedSourceLocation(Loc) && "Run out source location space");
+
     return TranslateSourceLocation(*OwningModuleFile, Loc);
   }
 
@@ -2250,9 +2253,11 @@ class ASTReader
     if (Loc.isInvalid())
       return Loc;
 
-    // It implies that the Loc is already translated.
-    if (SourceMgr.isLoadedSourceLocation(Loc))
-      return Loc;
+    // FIXME: TranslateSourceLocation is not re-enterable. It is problematic
+    // to call TranslateSourceLocation on a translated source location.
+    // We either need a method to know whether or not a source location is translated
+    // or refactor the code to make it clear that TranslateSourceLocation won't
+    // be called with translated source location.
 
     return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
   }
diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h
index 8e0959146fac6a..ce01893eb141f8 100644
--- a/clang/include/clang/Serialization/SourceLocationEncoding.h
+++ b/clang/include/clang/Serialization/SourceLocationEncoding.h
@@ -176,7 +176,6 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
   assert(Loc.getOffset() >= BaseOffset);
   Loc = Loc.getLocWithOffset(-BaseOffset);
   RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
-  assert(Encoded < ((RawLocEncoding)1 << 32));
 
   // 16 bits should be sufficient to store the module file index.
   assert(BaseModuleFileIndex < (1 << 16));



More information about the cfe-commits mailing list