[clang] [Serialization] No transitive identifier change (PR #92085)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 4 00:41:00 PDT 2024


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/92085

>From e8f756ec7f8ea7e5bf18cc122a965fb2f258fd15 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 14 May 2024 15:33:12 +0800
Subject: [PATCH] [Serialization] No transitive identifier change

---
 .../clang/Lex/ExternalPreprocessorSource.h    |  54 ++++++++-
 clang/include/clang/Lex/HeaderSearch.h        |  12 +-
 .../include/clang/Serialization/ASTBitCodes.h |   2 +-
 clang/include/clang/Serialization/ASTReader.h |  19 ++-
 .../include/clang/Serialization/ModuleFile.h  |   3 -
 clang/lib/Lex/HeaderSearch.cpp                |  33 +++---
 clang/lib/Serialization/ASTReader.cpp         |  98 ++++++++--------
 clang/lib/Serialization/ASTWriter.cpp         |  63 ++++++----
 clang/lib/Serialization/GlobalModuleIndex.cpp |   3 +-
 clang/lib/Serialization/ModuleFile.cpp        |   1 -
 .../no-transitive-identifier-change.cppm      | 110 ++++++++++++++++++
 11 files changed, 286 insertions(+), 112 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-identifier-change.cppm

diff --git a/clang/include/clang/Lex/ExternalPreprocessorSource.h b/clang/include/clang/Lex/ExternalPreprocessorSource.h
index 6775841860373..48429948dbffe 100644
--- a/clang/include/clang/Lex/ExternalPreprocessorSource.h
+++ b/clang/include/clang/Lex/ExternalPreprocessorSource.h
@@ -36,12 +36,64 @@ class ExternalPreprocessorSource {
   /// Return the identifier associated with the given ID number.
   ///
   /// The ID 0 is associated with the NULL identifier.
-  virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
+  virtual IdentifierInfo *GetIdentifier(uint64_t ID) = 0;
 
   /// Map a module ID to a module.
   virtual Module *getModule(unsigned ModuleID) = 0;
 };
 
+// Either a pointer to an IdentifierInfo of the controlling macro or the ID
+// number of the controlling macro.
+class LazyIdentifierInfoPtr {
+  // If the low bit is clear, a pointer to the IdentifierInfo. If the low
+  // bit is set, the upper 63 bits are the ID number.
+  mutable uint64_t Ptr = 0;
+
+public:
+  LazyIdentifierInfoPtr() = default;
+
+  explicit LazyIdentifierInfoPtr(const IdentifierInfo *Ptr)
+      : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
+
+  explicit LazyIdentifierInfoPtr(uint64_t ID) : Ptr((ID << 1) | 0x01) {
+    assert((ID << 1 >> 1) == ID && "ID must require < 63 bits");
+    if (ID == 0)
+      Ptr = 0;
+  }
+
+  LazyIdentifierInfoPtr &operator=(const IdentifierInfo *Ptr) {
+    this->Ptr = reinterpret_cast<uint64_t>(Ptr);
+    return *this;
+  }
+
+  LazyIdentifierInfoPtr &operator=(uint64_t ID) {
+    assert((ID << 1 >> 1) == ID && "IDs must require < 63 bits");
+    if (ID == 0)
+      Ptr = 0;
+    else
+      Ptr = (ID << 1) | 0x01;
+
+    return *this;
+  }
+
+  /// Whether this pointer is non-NULL.
+  ///
+  /// This operation does not require the AST node to be deserialized.
+  bool isValid() const { return Ptr != 0; }
+
+  /// Whether this pointer is currently stored as ID.
+  bool isID() const { return Ptr & 0x01; }
+
+  IdentifierInfo *getPtr() const {
+    assert(!isID());
+    return reinterpret_cast<IdentifierInfo *>(Ptr);
+  }
+
+  uint64_t getID() const {
+    assert(isID());
+    return Ptr >> 1;
+  }
+};
 }
 
 #endif
diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 5ac63dddd4d4e..65700b8f9dc11 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/DirectoryLookup.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderMap.h"
 #include "clang/Lex/ModuleMap.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -119,13 +120,6 @@ struct HeaderFileInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsValid : 1;
 
-  /// The ID number of the controlling macro.
-  ///
-  /// This ID number will be non-zero when there is a controlling
-  /// macro whose IdentifierInfo may not yet have been loaded from
-  /// external storage.
-  unsigned ControllingMacroID = 0;
-
   /// If this file has a \#ifndef XXX (or equivalent) guard that
   /// protects the entire contents of the file, this is the identifier
   /// for the macro that controls whether or not it has any effect.
@@ -134,7 +128,7 @@ struct HeaderFileInfo {
   /// the controlling macro of this header, since
   /// getControllingMacro() is able to load a controlling macro from
   /// external storage.
-  const IdentifierInfo *ControllingMacro = nullptr;
+  LazyIdentifierInfoPtr LazyControllingMacro;
 
   /// If this header came from a framework include, this is the name
   /// of the framework.
@@ -580,7 +574,7 @@ class HeaderSearch {
   /// no-op \#includes.
   void SetFileControllingMacro(FileEntryRef File,
                                const IdentifierInfo *ControllingMacro) {
-    getFileInfo(File).ControllingMacro = ControllingMacro;
+    getFileInfo(File).LazyControllingMacro = ControllingMacro;
   }
 
   /// Determine whether this file is intended to be safe from
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 902c4470650c5..03bac9be2bf3d 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -59,7 +59,7 @@ const unsigned VERSION_MINOR = 1;
 ///
 /// The ID numbers of identifiers are consecutive (in order of discovery)
 /// and start at 1. 0 is reserved for NULL.
-using IdentifierID = uint32_t;
+using IdentifierID = uint64_t;
 
 /// The number of predefined identifier IDs.
 const unsigned int NUM_PREDEF_IDENT_IDS = 1;
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 0a9006223dcbd..3f38a08f0da3a 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -660,14 +660,6 @@ class ASTReader
   /// been loaded.
   std::vector<IdentifierInfo *> IdentifiersLoaded;
 
-  using GlobalIdentifierMapType =
-      ContinuousRangeMap<serialization::IdentifierID, ModuleFile *, 4>;
-
-  /// Mapping from global identifier IDs to the module in which the
-  /// identifier resides along with the offset that should be added to the
-  /// global identifier ID to produce a local ID.
-  GlobalIdentifierMapType GlobalIdentifierMap;
-
   /// A vector containing macros that have already been
   /// loaded.
   ///
@@ -1539,6 +1531,11 @@ class ASTReader
   /// Translate a \param GlobalDeclID to the index of DeclsLoaded array.
   unsigned translateGlobalDeclIDToIndex(GlobalDeclID ID) const;
 
+  /// Translate an \param IdentifierID ID to the index of IdentifiersLoaded
+  /// array and the corresponding module file.
+  std::pair<ModuleFile *, unsigned>
+  translateIdentifierIDToIndex(serialization::IdentifierID ID) const;
+
 public:
   /// Load the AST file and validate its contents against the given
   /// Preprocessor.
@@ -2123,7 +2120,7 @@ class ASTReader
   /// Load a selector from disk, registering its ID if it exists.
   void LoadSelector(Selector Sel);
 
-  void SetIdentifierInfo(unsigned ID, IdentifierInfo *II);
+  void SetIdentifierInfo(serialization::IdentifierID ID, IdentifierInfo *II);
   void SetGloballyVisibleDecls(IdentifierInfo *II,
                                const SmallVectorImpl<GlobalDeclID> &DeclIDs,
                                SmallVectorImpl<Decl *> *Decls = nullptr);
@@ -2150,10 +2147,10 @@ class ASTReader
     return DecodeIdentifierInfo(ID);
   }
 
-  IdentifierInfo *getLocalIdentifier(ModuleFile &M, unsigned LocalID);
+  IdentifierInfo *getLocalIdentifier(ModuleFile &M, uint64_t LocalID);
 
   serialization::IdentifierID getGlobalIdentifierID(ModuleFile &M,
-                                                    unsigned LocalID);
+                                                    uint64_t LocalID);
 
   void resolvePendingMacro(IdentifierInfo *II, const PendingMacroInfo &PMInfo);
 
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 56193d44dd6f3..3787f4eeb8a8b 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -310,9 +310,6 @@ class ModuleFile {
   /// Base identifier ID for identifiers local to this module.
   serialization::IdentifierID BaseIdentifierID = 0;
 
-  /// Remapping table for identifier IDs in this module.
-  ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
-
   /// Actual data for the on-disk hash table of identifiers.
   ///
   /// This pointer points into a memory buffer, where the on-disk hash
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 574723b33866a..addcb6ce2b635 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -60,19 +60,21 @@ ALWAYS_ENABLED_STATISTIC(NumSubFrameworkLookups,
 
 const IdentifierInfo *
 HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) {
-  if (ControllingMacro) {
-    if (ControllingMacro->isOutOfDate()) {
-      assert(External && "We must have an external source if we have a "
-                         "controlling macro that is out of date.");
-      External->updateOutOfDateIdentifier(*ControllingMacro);
-    }
-    return ControllingMacro;
-  }
+  if (LazyControllingMacro.isID()) {
+    if (!External)
+      return nullptr;
 
-  if (!ControllingMacroID || !External)
-    return nullptr;
+    LazyControllingMacro =
+        External->GetIdentifier(LazyControllingMacro.getID());
+    return LazyControllingMacro.getPtr();
+  }
 
-  ControllingMacro = External->GetIdentifier(ControllingMacroID);
+  IdentifierInfo *ControllingMacro = LazyControllingMacro.getPtr();
+  if (ControllingMacro && ControllingMacro->isOutOfDate()) {
+    assert(External && "We must have an external source if we have a "
+                       "controlling macro that is out of date.");
+    External->updateOutOfDateIdentifier(*ControllingMacro);
+  }
   return ControllingMacro;
 }
 
@@ -1341,10 +1343,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
   mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader,
                                 OtherHFI.isTextualModuleHeader);
 
-  if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
-    HFI.ControllingMacro = OtherHFI.ControllingMacro;
-    HFI.ControllingMacroID = OtherHFI.ControllingMacroID;
-  }
+  if (!HFI.LazyControllingMacro.isValid())
+    HFI.LazyControllingMacro = OtherHFI.LazyControllingMacro;
 
   HFI.DirInfo = OtherHFI.DirInfo;
   HFI.External = (!HFI.IsValid || HFI.External);
@@ -1419,8 +1419,7 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {
   // once. Note that we dor't check for #import, because that's not a property
   // of the file itself.
   if (auto *HFI = getExistingFileInfo(File))
-    return HFI->isPragmaOnce || HFI->ControllingMacro ||
-           HFI->ControllingMacroID;
+    return HFI->isPragmaOnce || HFI->LazyControllingMacro.isValid();
   return false;
 }
 
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index a5a747f68f014..29a7fcb2cb507 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -920,7 +920,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) {
   SelectorTable &SelTable = Reader.getContext().Selectors;
   unsigned N = endian::readNext<uint16_t, llvm::endianness::little>(d);
   const IdentifierInfo *FirstII = Reader.getLocalIdentifier(
-      F, endian::readNext<uint32_t, llvm::endianness::little>(d));
+      F, endian::readNext<IdentifierID, llvm::endianness::little>(d));
   if (N == 0)
     return SelTable.getNullarySelector(FirstII);
   else if (N == 1)
@@ -930,7 +930,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) {
   Args.push_back(FirstII);
   for (unsigned I = 1; I != N; ++I)
     Args.push_back(Reader.getLocalIdentifier(
-        F, endian::readNext<uint32_t, llvm::endianness::little>(d)));
+        F, endian::readNext<IdentifierID, llvm::endianness::little>(d)));
 
   return SelTable.getSelector(N, Args.data());
 }
@@ -1011,7 +1011,8 @@ static bool readBit(unsigned &Bits) {
 IdentifierID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
   using namespace llvm::support;
 
-  unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d);
+  IdentifierID RawID =
+      endian::readNext<IdentifierID, llvm::endianness::little>(d);
   return Reader.getGlobalIdentifierID(F, RawID >> 1);
 }
 
@@ -1029,9 +1030,12 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
                                                    unsigned DataLen) {
   using namespace llvm::support;
 
-  unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d);
+  IdentifierID RawID =
+      endian::readNext<IdentifierID, llvm::endianness::little>(d);
   bool IsInteresting = RawID & 0x01;
 
+  DataLen -= sizeof(IdentifierID);
+
   // Wipe out the "is interesting" bit.
   RawID = RawID >> 1;
 
@@ -1062,7 +1066,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
   bool HadMacroDefinition = readBit(Bits);
 
   assert(Bits == 0 && "Extra bits in the identifier?");
-  DataLen -= 8;
+  DataLen -= sizeof(uint16_t) * 2;
 
   // Set or check the various bits in the IdentifierInfo structure.
   // Token IDs are read-only.
@@ -1188,7 +1192,7 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) {
   case DeclarationName::CXXLiteralOperatorName:
   case DeclarationName::CXXDeductionGuideName:
     Data = (uint64_t)Reader.getLocalIdentifier(
-        F, endian::readNext<uint32_t, llvm::endianness::little>(d));
+        F, endian::readNext<IdentifierID, llvm::endianness::little>(d));
     break;
   case DeclarationName::ObjCZeroArgSelector:
   case DeclarationName::ObjCOneArgSelector:
@@ -2054,8 +2058,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
   HFI.DirInfo = (Flags >> 1) & 0x07;
   HFI.IndexHeaderMapHeader = Flags & 0x01;
-  HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
-      M, endian::readNext<uint32_t, llvm::endianness::little>(d));
+  HFI.LazyControllingMacro = Reader.getGlobalIdentifierID(
+      M, endian::readNext<IdentifierID, llvm::endianness::little>(d));
   if (unsigned FrameworkOffset =
           endian::readNext<uint32_t, llvm::endianness::little>(d)) {
     // The framework offset is 1 greater than the actual offset,
@@ -3429,24 +3433,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
             "duplicate IDENTIFIER_OFFSET record in AST file");
       F.IdentifierOffsets = (const uint32_t *)Blob.data();
       F.LocalNumIdentifiers = Record[0];
-      unsigned LocalBaseIdentifierID = Record[1];
       F.BaseIdentifierID = getTotalNumIdentifiers();
 
-      if (F.LocalNumIdentifiers > 0) {
-        // Introduce the global -> local mapping for identifiers within this
-        // module.
-        GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1,
-                                                  &F));
-
-        // Introduce the local -> global mapping for identifiers within this
-        // module.
-        F.IdentifierRemap.insertOrReplace(
-          std::make_pair(LocalBaseIdentifierID,
-                         F.BaseIdentifierID - LocalBaseIdentifierID));
-
+      if (F.LocalNumIdentifiers > 0)
         IdentifiersLoaded.resize(IdentifiersLoaded.size()
                                  + F.LocalNumIdentifiers);
-      }
       break;
     }
 
@@ -4038,7 +4029,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   F.ModuleOffsetMap = StringRef();
 
   using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
-  RemapBuilder IdentifierRemap(F.IdentifierRemap);
   RemapBuilder MacroRemap(F.MacroRemap);
   RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap);
   RemapBuilder SubmoduleRemap(F.SubmoduleRemap);
@@ -4071,8 +4061,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
 
     ImportedModuleVector.push_back(OM);
 
-    uint32_t IdentifierIDOffset =
-        endian::readNext<uint32_t, llvm::endianness::little>(Data);
     uint32_t MacroIDOffset =
         endian::readNext<uint32_t, llvm::endianness::little>(Data);
     uint32_t PreprocessedEntityIDOffset =
@@ -4092,7 +4080,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
                                     static_cast<int>(BaseOffset - Offset)));
     };
 
-    mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap);
     mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap);
     mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID,
               PreprocessedEntityRemap);
@@ -8181,7 +8168,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() {
   dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap);
   dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap);
   dumpModuleIDMap("Global type map", GlobalTypeMap);
-  dumpModuleIDMap("Global identifier map", GlobalIdentifierMap);
   dumpModuleIDMap("Global macro map", GlobalMacroMap);
   dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap);
   dumpModuleIDMap("Global selector map", GlobalSelectorMap);
@@ -8822,8 +8808,9 @@ void ASTReader::LoadSelector(Selector Sel) {
 
 void ASTReader::SetIdentifierInfo(IdentifierID ID, IdentifierInfo *II) {
   assert(ID && "Non-zero identifier ID required");
-  assert(ID <= IdentifiersLoaded.size() && "identifier ID out of range");
-  IdentifiersLoaded[ID - 1] = II;
+  unsigned Index = translateIdentifierIDToIndex(ID).second;
+  assert(Index < IdentifiersLoaded.size() && "identifier ID out of range");
+  IdentifiersLoaded[Index] = II;
   if (DeserializationListener)
     DeserializationListener->IdentifierRead(ID, II);
 }
@@ -8876,6 +8863,22 @@ void ASTReader::SetGloballyVisibleDecls(
   }
 }
 
+std::pair<ModuleFile *, unsigned>
+ASTReader::translateIdentifierIDToIndex(IdentifierID ID) const {
+  if (ID == 0)
+    return {nullptr, 0};
+
+  unsigned ModuleFileIndex = ID >> 32;
+  unsigned LocalID = ID & llvm::maskTrailingOnes<IdentifierID>(32);
+
+  assert(ModuleFileIndex && "not translating loaded IdentifierID?");
+  assert(getModuleManager().size() > ModuleFileIndex - 1);
+
+  ModuleFile &MF = getModuleManager()[ModuleFileIndex - 1];
+  assert(LocalID < MF.LocalNumIdentifiers);
+  return {&MF, MF.BaseIdentifierID + LocalID};
+}
+
 IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) {
   if (ID == 0)
     return nullptr;
@@ -8885,45 +8888,48 @@ IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) {
     return nullptr;
   }
 
-  ID -= 1;
-  if (!IdentifiersLoaded[ID]) {
-    GlobalIdentifierMapType::iterator I = GlobalIdentifierMap.find(ID + 1);
-    assert(I != GlobalIdentifierMap.end() && "Corrupted global identifier map");
-    ModuleFile *M = I->second;
-    unsigned Index = ID - M->BaseIdentifierID;
+  auto [M, Index] = translateIdentifierIDToIndex(ID);
+  if (!IdentifiersLoaded[Index]) {
+    assert(M != nullptr && "Untranslated Identifier ID?");
+    assert(Index >= M->BaseIdentifierID);
+    unsigned LocalIndex = Index - M->BaseIdentifierID;
     const unsigned char *Data =
-        M->IdentifierTableData + M->IdentifierOffsets[Index];
+        M->IdentifierTableData + M->IdentifierOffsets[LocalIndex];
 
     ASTIdentifierLookupTrait Trait(*this, *M);
     auto KeyDataLen = Trait.ReadKeyDataLength(Data);
     auto Key = Trait.ReadKey(Data, KeyDataLen.first);
     auto &II = PP.getIdentifierTable().get(Key);
-    IdentifiersLoaded[ID] = &II;
+    IdentifiersLoaded[Index] = &II;
     markIdentifierFromAST(*this,  II);
     if (DeserializationListener)
-      DeserializationListener->IdentifierRead(ID + 1, &II);
+      DeserializationListener->IdentifierRead(ID, &II);
   }
 
-  return IdentifiersLoaded[ID];
+  return IdentifiersLoaded[Index];
 }
 
-IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) {
+IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, uint64_t LocalID) {
   return DecodeIdentifierInfo(getGlobalIdentifierID(M, LocalID));
 }
 
-IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) {
+IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, uint64_t LocalID) {
   if (LocalID < NUM_PREDEF_IDENT_IDS)
     return LocalID;
 
   if (!M.ModuleOffsetMap.empty())
     ReadModuleOffsetMap(M);
 
-  ContinuousRangeMap<uint32_t, int, 2>::iterator I
-    = M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS);
-  assert(I != M.IdentifierRemap.end()
-         && "Invalid index into identifier index remap");
+  unsigned ModuleFileIndex = LocalID >> 32;
+  LocalID &= llvm::maskTrailingOnes<IdentifierID>(32);
+  ModuleFile *MF =
+      ModuleFileIndex ? M.TransitiveImports[ModuleFileIndex - 1] : &M;
+  assert(MF && "malformed identifier ID encoding?");
 
-  return LocalID + I->second;
+  if (!ModuleFileIndex)
+    LocalID -= NUM_PREDEF_IDENT_IDS;
+
+  return ((IdentifierID)(MF->Index + 1) << 32) | LocalID;
 }
 
 MacroInfo *ASTReader::getMacro(MacroID ID) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index b8b613db712f4..dd6c135d54b46 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1991,7 +1991,7 @@ namespace {
     std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
-      unsigned DataLen = 1 + 4 + 4;
+      unsigned DataLen = 1 + sizeof(IdentifierID) + 4;
       for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
@@ -2026,10 +2026,11 @@ namespace {
                           | Data.HFI.IndexHeaderMapHeader;
       LE.write<uint8_t>(Flags);
 
-      if (!Data.HFI.ControllingMacro)
-        LE.write<uint32_t>(Data.HFI.ControllingMacroID);
+      if (Data.HFI.LazyControllingMacro.isID())
+        LE.write<IdentifierID>(Data.HFI.LazyControllingMacro.getID());
       else
-        LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro));
+        LE.write<IdentifierID>(
+            Writer.getIdentifierRef(Data.HFI.LazyControllingMacro.getPtr()));
 
       unsigned Offset = 0;
       if (!Data.HFI.Framework.empty()) {
@@ -3452,7 +3453,9 @@ class ASTMethodPoolTrait {
   std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, Selector Sel,
                       data_type_ref Methods) {
-    unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4);
+    unsigned KeyLen =
+        2 + (Sel.getNumArgs() ? Sel.getNumArgs() * sizeof(IdentifierID)
+                              : sizeof(IdentifierID));
     unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts
     for (const ObjCMethodList *Method = &Methods.Instance; Method;
          Method = Method->getNext())
@@ -3477,7 +3480,7 @@ class ASTMethodPoolTrait {
     if (N == 0)
       N = 1;
     for (unsigned I = 0; I != N; ++I)
-      LE.write<uint32_t>(
+      LE.write<IdentifierID>(
           Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I)));
   }
 
@@ -3788,7 +3791,7 @@ class ASTIdentifierTableTrait {
       InterestingIdentifierOffsets->push_back(Out.tell());
 
     unsigned KeyLen = II->getLength() + 1;
-    unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
+    unsigned DataLen = sizeof(IdentifierID); // bytes for the persistent ID << 1
     if (isInterestingIdentifier(II, MacroOffset)) {
       DataLen += 2; // 2 bytes for builtin ID
       DataLen += 2; // 2 bytes for flags
@@ -3814,11 +3817,11 @@ class ASTIdentifierTableTrait {
 
     auto MacroOffset = Writer.getMacroDirectivesOffset(II);
     if (!isInterestingIdentifier(II, MacroOffset)) {
-      LE.write<uint32_t>(ID << 1);
+      LE.write<IdentifierID>(ID << 1);
       return;
     }
 
-    LE.write<uint32_t>((ID << 1) | 0x01);
+    LE.write<IdentifierID>((ID << 1) | 0x01);
     uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID();
     assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader.");
     LE.write<uint16_t>(Bits);
@@ -3851,6 +3854,10 @@ class ASTIdentifierTableTrait {
 
 } // namespace
 
+/// If the \param IdentifierID ID is a local Identifier ID. If the higher
+/// bits of ID is 0, it implies that the ID doesn't come from AST files.
+static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); }
+
 /// Write the identifier table into the AST file.
 ///
 /// The identifier table consists of a blob containing string data
@@ -3895,7 +3902,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
 
       // Write out identifiers if either the ID is local or the identifier has
       // changed since it was loaded.
-      if (ID >= FirstIdentID || II->hasChangedSinceDeserialization() ||
+      if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() ||
           (Trait.needDecls() &&
            II->hasFETokenInfoChangedSinceDeserialization()))
         Generator.insert(II, ID, Trait);
@@ -3929,7 +3936,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
   auto Abbrev = std::make_shared<BitCodeAbbrev>();
   Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
 
@@ -3939,8 +3945,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
 #endif
 
   RecordData::value_type Record[] = {IDENTIFIER_OFFSET,
-                                     IdentifierOffsets.size(),
-                                     FirstIdentID - NUM_PREDEF_IDENT_IDS};
+                                     IdentifierOffsets.size()};
   Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
                             bytes(IdentifierOffsets));
 
@@ -4030,11 +4035,13 @@ class ASTDeclContextNameLookupTrait {
     unsigned KeyLen = 1;
     switch (Name.getKind()) {
     case DeclarationName::Identifier:
+    case DeclarationName::CXXLiteralOperatorName:
+    case DeclarationName::CXXDeductionGuideName:
+      KeyLen += sizeof(IdentifierID);
+      break;
     case DeclarationName::ObjCZeroArgSelector:
     case DeclarationName::ObjCOneArgSelector:
     case DeclarationName::ObjCMultiArgSelector:
-    case DeclarationName::CXXLiteralOperatorName:
-    case DeclarationName::CXXDeductionGuideName:
       KeyLen += 4;
       break;
     case DeclarationName::CXXOperatorName:
@@ -4062,7 +4069,7 @@ class ASTDeclContextNameLookupTrait {
     case DeclarationName::Identifier:
     case DeclarationName::CXXLiteralOperatorName:
     case DeclarationName::CXXDeductionGuideName:
-      LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier()));
+      LE.write<IdentifierID>(Writer.getIdentifierRef(Name.getIdentifier()));
       return;
     case DeclarationName::ObjCZeroArgSelector:
     case DeclarationName::ObjCOneArgSelector:
@@ -4780,8 +4787,15 @@ void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
   IdentifierID ID = IdentifierIDs[II];
   // Only store offsets new to this AST file. Other identifier names are looked
   // up earlier in the chain and thus don't need an offset.
-  if (ID >= FirstIdentID)
-    IdentifierOffsets[ID - FirstIdentID] = Offset;
+  if (!isLocalIdentifierID(ID))
+    return;
+
+  // For local identifiers, the module file index must be 0.
+
+  assert(ID != 0);
+  ID -= NUM_PREDEF_IDENT_IDS;
+  assert(ID < IdentifierOffsets.size());
+  IdentifierOffsets[ID] = Offset;
 }
 
 /// Note that the selector Sel occurs at the given offset
@@ -5415,7 +5429,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
         // These values should be unique within a chain, since they will be read
         // as keys into ContinuousRangeMaps.
-        writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers);
         writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros);
         writeBaseIDOrNone(M.BasePreprocessedEntityID,
                           M.NumPreprocessedEntities);
@@ -6614,20 +6627,26 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) {
   // Note, this will get called multiple times, once one the reader starts up
   // and again each time it's done reading a PCH or module.
   FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes();
-  FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers();
   FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros();
   FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules();
   FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors();
   NextTypeID = FirstTypeID;
-  NextIdentID = FirstIdentID;
   NextMacroID = FirstMacroID;
   NextSelectorID = FirstSelectorID;
   NextSubmoduleID = FirstSubmoduleID;
 }
 
 void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) {
-  // Always keep the highest ID. See \p TypeRead() for more information.
   IdentifierID &StoredID = IdentifierIDs[II];
+  unsigned OriginalModuleFileIndex = StoredID >> 32;
+
+  // Always keep the local identifier ID. See \p TypeRead() for more
+  // information.
+  if (OriginalModuleFileIndex == 0 && StoredID)
+    return;
+
+  // Otherwise, keep the highest ID since the module file comes later has
+  // higher module file indexes.
   if (ID > StoredID)
     StoredID = ID;
 }
diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp
index f09ceb8d31620..1163943c5dffa 100644
--- a/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -510,7 +510,8 @@ namespace {
       // The first bit indicates whether this identifier is interesting.
       // That's all we care about.
       using namespace llvm::support;
-      unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d);
+      IdentifierID RawID =
+          endian::readNext<IdentifierID, llvm::endianness::little>(d);
       bool IsInteresting = RawID & 0x01;
       return std::make_pair(k, IsInteresting);
     }
diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp
index f64a59bd94891..7976c28b28671 100644
--- a/clang/lib/Serialization/ModuleFile.cpp
+++ b/clang/lib/Serialization/ModuleFile.cpp
@@ -62,7 +62,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() {
 
   llvm::errs() << "  Base identifier ID: " << BaseIdentifierID << '\n'
                << "  Number of identifiers: " << LocalNumIdentifiers << '\n';
-  dumpLocalRemap("Identifier ID local -> global map", IdentifierRemap);
 
   llvm::errs() << "  Base macro ID: " << BaseMacroID << '\n'
                << "  Number of macros: " << LocalNumMacros << '\n';
diff --git a/clang/test/Modules/no-transitive-identifier-change.cppm b/clang/test/Modules/no-transitive-identifier-change.cppm
new file mode 100644
index 0000000000000..97e8ac4444fdd
--- /dev/null
+++ b/clang/test/Modules/no-transitive-identifier-change.cppm
@@ -0,0 +1,110 @@
+// Testing that adding an new identifier in an unused module file won't change 
+// the BMI of the current module file.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm
+// RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \
+// RUN:     %t/m-partA.v1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm
+// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \
+// RUN:     -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm
+// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \
+// RUN:     -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm
+//
+// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \
+// RUN:     -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \
+// RUN:     -fmodule-file=m:partB=%t/m-partB.pcm
+// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \
+// RUN:     -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \
+// RUN:     -fmodule-file=m:partB=%t/m-partB.pcm
+// Since useBOnly only uses partB from module M, the change in partA shouldn't affect
+// useBOnly.
+// RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null
+//
+// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.pcm \
+// RUN:     -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \
+// RUN:     -fmodule-file=m:partB=%t/m-partB.pcm
+// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.v1.pcm \
+// RUN:     -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \
+// RUN:     -fmodule-file=m:partB=%t/m-partB.pcm
+// useAOnly should differ
+// RUN: not diff %t/useAOnly.pcm %t/useAOnly.v1.pcm &> /dev/null
+
+//--- m-partA.cppm
+export module m:partA;
+
+export inline int getA() {
+    return 43;
+}
+
+export class A {
+public:
+    int getMem();
+};
+
+export template <typename T>
+class ATempl {
+public:
+    T getT();
+};
+
+//--- m-partA.v1.cppm
+export module m:partA;
+
+export inline int getA() {
+    return 43;
+}
+
+// Now we add a new declaration without introducing a new type.
+// The consuming module which didn't use m:partA completely is expected to be
+// not changed.
+export inline int getA2() {
+    return 88;
+}
+
+export class A {
+public:
+    int getMem();
+    // Now we add a new declaration without introducing a new type.
+    // The consuming module which didn't use m:partA completely is expected to be
+    // not changed.
+    int getMem2();
+};
+
+export template <typename T>
+class ATempl {
+public:
+    T getT();
+    // Add a new declaration without introducing a new type.
+    T getT2();
+};
+
+//--- m-partB.cppm
+export module m:partB;
+
+export inline int getB() {
+    return 430;
+}
+
+//--- m.cppm
+export module m;
+export import :partA;
+export import :partB;
+
+//--- useBOnly.cppm
+export module useBOnly;
+import m;
+
+export inline int get() {
+    return getB();
+}
+
+//--- useAOnly.cppm
+export module useAOnly;
+import m;
+
+export inline int get() {
+    return getA();
+}



More information about the cfe-commits mailing list