[clang] [clang][modules] Deserialize submodule lazily (PR #194968)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 29 15:59:26 PDT 2026
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/194968
>From 21a9fac21c7539225d8936c832100096a824bd46 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 29 Apr 2026 15:58:48 -0700
Subject: [PATCH 1/2] [clang][modules] Deserialize submodule lazily
---
clang/include/clang/Basic/Module.h | 78 ++++-
clang/include/clang/Lex/ModuleMap.h | 2 +-
clang/include/clang/Lex/Preprocessor.h | 2 +-
.../include/clang/Serialization/ASTBitCodes.h | 14 +-
clang/include/clang/Serialization/ASTReader.h | 34 +--
.../include/clang/Serialization/ModuleFile.h | 16 +
clang/lib/Basic/Module.cpp | 5 +-
clang/lib/Lex/ModuleMap.cpp | 8 +-
clang/lib/Lex/Preprocessor.cpp | 2 +-
clang/lib/Sema/SemaLookup.cpp | 3 +-
clang/lib/Sema/SemaModule.cpp | 4 +-
clang/lib/Serialization/ASTReader.cpp | 283 +++++++++---------
clang/lib/Serialization/ASTWriter.cpp | 72 +++--
13 files changed, 306 insertions(+), 217 deletions(-)
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index f83319db082d7..cadad5bf90e17 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -48,9 +48,17 @@ namespace clang {
class FileManager;
class LangOptions;
+class Module;
class ModuleMap;
class TargetInfo;
+/// Interface for on-demand deserialization of submodules stored in a PCM file.
+class ExternalSubmoduleSource {
+public:
+ virtual Module *getSubmodule(uint32_t GlobalID) = 0;
+ virtual ~ExternalSubmoduleSource() = default;
+};
+
/// Describes the name of a module.
using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>;
@@ -222,6 +230,43 @@ struct ModuleAttributes {
NoUndeclaredIncludes(false) {}
};
+/// A reference to either a fully materialized Module object, or
+/// a yet-to-be-deserialized submodule in an AST file.
+class ModuleRef {
+ mutable Module *Existing = nullptr;
+ mutable ExternalSubmoduleSource *ExternalSource = nullptr;
+ mutable uint64_t SubmoduleID = 0;
+
+public:
+ ModuleRef() = default;
+ ModuleRef(Module *M) : Existing(M) {}
+ ModuleRef(ExternalSubmoduleSource *ExtSrc, uint64_t SubmoduleID)
+ : ExternalSource(ExtSrc), SubmoduleID(SubmoduleID) {}
+
+ Module *getExisting() const { return Existing; }
+ void setExisting(Module *E) { Existing = E; }
+
+ void setExternal(ExternalSubmoduleSource *ExtSrc, uint64_t ID) {
+ ExternalSource = ExtSrc;
+ SubmoduleID = ID;
+ }
+
+ operator bool() const {
+ return Existing || (ExternalSource && SubmoduleID);
+ }
+
+ operator Module *() const {
+ if (ExternalSource) {
+ Existing = ExternalSource->getSubmodule(SubmoduleID);
+ ExternalSource = nullptr;
+ SubmoduleID = 0;
+ }
+ return Existing;
+ }
+
+ Module *operator->() const { return *this; }
+};
+
/// Required to construct a Module.
///
/// This tag type is only constructible by ModuleMap, guaranteeing it ownership
@@ -348,7 +393,7 @@ class alignas(8) Module {
private:
/// The submodules of this module, indexed by name.
- std::vector<Module *> SubModules;
+ std::vector<ModuleRef> SubModules;
/// A mapping from the submodule name to the index into the
/// \c SubModules vector at which that submodule resides.
@@ -552,17 +597,17 @@ class alignas(8) Module {
/// The set of modules imported by this module, and on which this
/// module depends.
- llvm::SmallSetVector<Module *, 2> Imports;
+ llvm::SmallVector<ModuleRef, 2> Imports;
/// The set of top-level modules that affected the compilation of this module,
/// but were not imported.
- llvm::SmallSetVector<Module *, 2> AffectingClangModules;
+ llvm::SmallVector<ModuleRef, 2> AffectingClangModules;
/// Describes an exported module.
///
/// The pointer is the module being re-exported, while the bit will be true
/// to indicate that this is a wildcard export.
- using ExportDecl = std::pair<Module *, bool>;
+ using ExportDecl = std::pair<ModuleRef, bool>;
/// The set of export declarations.
SmallVector<ExportDecl, 2> Exports;
@@ -640,7 +685,7 @@ class alignas(8) Module {
/// A conflict between two modules.
struct Conflict {
/// The module that this module conflicts with.
- Module *Other;
+ ModuleRef Other;
/// The message provided to the user when there is a conflict.
std::string Message;
@@ -742,6 +787,23 @@ class alignas(8) Module {
Parent->SubModules.push_back(this);
}
+ /// Add a child submodule.
+ void addSubmodule(StringRef Name, Module *Submodule) {
+ auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()});
+ if (New)
+ SubModules.emplace_back();
+ SubModules[It->second].setExisting(Submodule);
+ }
+
+ /// Add the external part of a submodule ModuleRef.
+ void addSubmodule(StringRef Name, ExternalSubmoduleSource *ExternalSource,
+ uint64_t SubmoduleID) {
+ auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()});
+ if (New)
+ SubModules.emplace_back();
+ SubModules[It->second].setExternal(ExternalSource, SubmoduleID);
+ }
+
/// Is this module have similar semantics as headers.
bool isHeaderLikeModule() const {
return isModuleMapModule() || isHeaderUnit();
@@ -913,7 +975,7 @@ class alignas(8) Module {
/// Find the submodule with the given name.
///
/// \returns The submodule if found, or NULL otherwise.
- Module *findSubmodule(StringRef Name) const;
+ ModuleRef findSubmodule(StringRef Name) const;
/// Get the Global Module Fragment (sub-module) for this module, it there is
/// one.
@@ -941,8 +1003,8 @@ class alignas(8) Module {
unsigned getVisibilityID() const { return VisibilityID; }
- using submodule_iterator = std::vector<Module *>::iterator;
- using submodule_const_iterator = std::vector<Module *>::const_iterator;
+ using submodule_iterator = std::vector<ModuleRef>::iterator;
+ using submodule_const_iterator = std::vector<ModuleRef>::const_iterator;
llvm::iterator_range<submodule_iterator> submodules() {
return llvm::make_range(SubModules.begin(), SubModules.end());
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index ed326a7fd545b..12f8dbb0b6090 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -548,7 +548,7 @@ class ModuleMap {
/// null, we will look for a top-level module.
///
/// \returns The named submodule, if known; otherwose, returns null.
- Module *lookupModuleQualified(StringRef Name, Module *Context) const;
+ ModuleRef lookupModuleQualified(StringRef Name, Module *Context) const;
/// Find a new module or submodule, or create it if it does not already
/// exist.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 8830294ea1658..8cba21539e48a 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1535,7 +1535,7 @@ class Preprocessor {
assert(M->isModuleMapModule());
if (!BuildingSubmoduleStack.empty()) {
if (M != BuildingSubmoduleStack.back().M)
- BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
+ BuildingSubmoduleStack.back().M->AffectingClangModules.push_back(M);
} else {
AffectingClangModules.insert(M);
}
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 9a41f9e89df98..3c8f3ba59a07e 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -44,7 +44,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
-const unsigned VERSION_MAJOR = 37;
+const unsigned VERSION_MAJOR = 38;
/// AST file minor version number supported by this version of
/// Clang.
@@ -751,6 +751,10 @@ enum ASTRecordTypes {
/// Record code for extname-redefined undeclared identifiers.
EXTNAME_UNDECLARED_IDENTIFIERS = 79,
+
+ /// Record that encodes the number of submodules, their base ID in the AST
+ /// file, and for each module the relative bit offset into the stream.
+ SUBMODULE_METADATA = 80,
};
/// Record types used within a source manager block.
@@ -819,8 +823,8 @@ enum PreprocessorDetailRecordTypes {
/// Record types used within a submodule description block.
enum SubmoduleRecordTypes {
- /// Metadata for submodules as a whole.
- SUBMODULE_METADATA = 0,
+ /// Defines the end of a single submodule. Sentinel record without any data.
+ SUBMODULE_END = 0,
/// Defines the major attributes of a submodule, including its
/// name and parent.
@@ -884,6 +888,10 @@ enum SubmoduleRecordTypes {
/// Specifies affecting modules that were not imported.
SUBMODULE_AFFECTING_MODULES = 18,
+
+ /// Specifies a direct submodule by name and ID, enabling on-demand
+ /// deserialization of children without loading the entire submodule block.
+ SUBMODULE_CHILD = 19,
};
/// Record types used within a comments block.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 8394647885bd3..7ca79f01937fe 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -424,7 +424,8 @@ class ASTReader
public ExternalHeaderFileInfoSource,
public ExternalSemaSource,
public IdentifierInfoLookup,
- public ExternalSLocEntrySource
+ public ExternalSLocEntrySource,
+ public ExternalSubmoduleSource
{
public:
/// Types of AST files.
@@ -820,32 +821,6 @@ class ASTReader
/// declarations in that submodule that could be made visible.
HiddenNamesMapType HiddenNamesMap;
- /// A module import, export, or conflict that hasn't yet been resolved.
- struct UnresolvedModuleRef {
- /// The file in which this module resides.
- ModuleFile *File;
-
- /// The module that is importing or exporting.
- Module *Mod;
-
- /// The kind of module reference.
- enum { Import, Export, Conflict, Affecting } Kind;
-
- /// The local ID of the module that is being exported.
- unsigned ID;
-
- /// Whether this is a wildcard export.
- LLVM_PREFERRED_TYPE(bool)
- unsigned IsWildcard : 1;
-
- /// String data.
- StringRef String;
- };
-
- /// The set of module imports and exports that still need to be
- /// resolved.
- SmallVector<UnresolvedModuleRef, 2> UnresolvedModuleRefs;
-
/// A vector containing selectors that have already been loaded.
///
/// This vector is indexed by the Selector ID (-1). NULL selector
@@ -1612,8 +1587,6 @@ class ASTReader
ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
const ModuleFile *ImportedBy,
unsigned ClientLoadCapabilities);
- llvm::Error ReadSubmoduleBlock(ModuleFile &F,
- unsigned ClientLoadCapabilities);
static bool ParseLanguageOptions(const RecordData &Record,
StringRef ModuleFilename, bool Complain,
ASTReaderListener &Listener,
@@ -2444,8 +2417,7 @@ class ASTReader
unsigned LocalID) const;
/// Retrieve the submodule that corresponds to a global submodule ID.
- ///
- Module *getSubmodule(serialization::SubmoduleID GlobalID);
+ Module *getSubmodule(uint32_t GlobalID) override;
/// Retrieve the module that corresponds to the given module ID.
///
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 58f2fcba01e67..6c47040fde093 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -447,9 +447,25 @@ class ModuleFile {
/// Base submodule ID for submodules local to this module.
serialization::SubmoduleID BaseSubmoduleID = 0;
+ /// Base submodule ID for submodules local to this module within its own
+ /// address space.
+ unsigned LocalBaseSubmoduleID = 0;
+
+ /// Local submodule ID of the top-level module.
+ unsigned LocalTopLevelSubmoduleID = 0;
+
/// Remapping table for submodule IDs in this module.
ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
+ /// The cursor to the start of the submodules block.
+ llvm::BitstreamCursor SubmodulesCursor;
+
+ /// Absolute offset of the start of the submodules block.
+ uint64_t SubmodulesOffsetBase = 0;
+
+ /// Relative offsets for all submodule entries in the AST file.
+ const llvm::support::unaligned_uint64_t *SubmoduleOffsets = nullptr;
+
// === Selectors ===
/// The number of selectors new to this file.
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index 66629baa6240b..d27abb1153c72 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -53,8 +53,7 @@ Module::Module(ModuleConstructorTag, StringRef Name,
NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
- Parent->SubModuleIndex[Name] = Parent->SubModules.size();
- Parent->SubModules.push_back(this);
+ Parent->addSubmodule(Name, this);
}
}
@@ -348,7 +347,7 @@ void Module::markUnavailable(bool Unimportable) {
}
}
-Module *Module::findSubmodule(StringRef Name) const {
+ModuleRef Module::findSubmodule(StringRef Name) const {
if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
return SubModules[It->second];
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 71d8bef278179..6e2f7e0761c8e 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -938,7 +938,7 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
return findModule(Name);
}
-Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
+ModuleRef ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
if (!Context)
return findModule(Name);
@@ -950,8 +950,8 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
bool IsFramework,
bool IsExplicit) {
// Try to find an existing module with this name.
- if (Module *Sub = lookupModuleQualified(Name, Parent))
- return std::make_pair(Sub, false);
+ if (ModuleRef Sub = lookupModuleQualified(Name, Parent); Sub.getExisting())
+ return std::make_pair(Sub.getExisting(), false);
// Create a new module with this name.
Module *M = createModule(Name, Parent, IsFramework, IsExplicit);
@@ -960,7 +960,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
Module *ModuleMap::createModule(StringRef Name, Module *Parent,
bool IsFramework, bool IsExplicit) {
- assert(lookupModuleQualified(Name, Parent) == nullptr &&
+ assert(!lookupModuleQualified(Name, Parent).getExisting() &&
"Creating duplicate submodule");
Module *Result = new (ModulesAlloc.Allocate())
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index b08459632aacb..761bf8e9af56b 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1450,7 +1450,7 @@ void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc,
// Add this module to the imports list of the currently-built submodule.
if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
- BuildingSubmoduleStack.back().M->Imports.insert(M);
+ BuildingSubmoduleStack.back().M->Imports.push_back(M);
}
bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index b96065f8619d2..e4e55bb7d0ac7 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2032,7 +2032,8 @@ bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
// Directly imported module are necessarily reachable.
// Since we can't export import a module implementation partition unit, we
// don't need to count for Exports here.
- if (CurrentM && CurrentM->getTopLevelModule()->Imports.count(DeclTopModule))
+ if (CurrentM &&
+ llvm::is_contained(CurrentM->getTopLevelModule()->Imports, DeclTopModule))
return true;
// Then we treat all module implementation partition unit as unreachable.
diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 67f46b64cf047..caa61a99a6914 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -483,7 +483,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
// Sequence initialization of the imported module before that of the current
// module, if any.
Context.addModuleInitializer(ModuleScopes.back().Module, Import);
- Mod->Imports.insert(Interface); // As if we imported it.
+ Mod->Imports.push_back(Interface); // As if we imported it.
// Also save this as a shortcut to checking for decls in the interface
ThePrimaryInterface = Interface;
// If we made an implicit import of the module interface, then return the
@@ -710,7 +710,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
if (ExportLoc.isValid() || getEnclosingExportDecl(Import))
getCurrentModule()->Exports.emplace_back(Mod, false);
else
- getCurrentModule()->Imports.insert(Mod);
+ getCurrentModule()->Imports.push_back(Mod);
}
HadImportedNamedModules = true;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 2c0b908314fa5..41eca76d36d82 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3746,8 +3746,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
case SUBMODULE_BLOCK_ID:
- if (llvm::Error Err = ReadSubmoduleBlock(F, ClientLoadCapabilities))
+ F.SubmodulesCursor = Stream;
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
+ if (llvm::Error Err =
+ ReadBlockAbbrevs(F.SubmodulesCursor, SUBMODULE_BLOCK_ID))
return Err;
+ F.SubmodulesOffsetBase = F.SubmodulesCursor.GetCurrentBitNo();
break;
case COMMENTS_BLOCK_ID: {
@@ -3799,6 +3804,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
case HEADER_SEARCH_TABLE:
case IMPORTED_MODULES:
case MACRO_OFFSET:
+ case SUBMODULE_METADATA:
break;
default:
continue;
@@ -3809,6 +3815,49 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
default: // Default behavior: ignore.
break;
+ case SUBMODULE_METADATA: {
+ F.BaseSubmoduleID = getTotalNumSubmodules();
+ F.LocalNumSubmodules = Record[0];
+ F.LocalBaseSubmoduleID = Record[1];
+ F.LocalTopLevelSubmoduleID = Record[2];
+ F.SubmoduleOffsets =
+ (const llvm::support::unaligned_uint64_t *)Blob.data();
+ if (F.LocalNumSubmodules > 0) {
+ // Introduce the global -> local mapping for submodules within this
+ // module.
+ GlobalSubmoduleMap.insert(
+ std::make_pair(getTotalNumSubmodules() + 1, &F));
+
+ // Introduce the local -> global mapping for submodules within this
+ // module.
+ F.SubmoduleRemap.insertOrReplace(std::make_pair(
+ F.LocalBaseSubmoduleID, F.BaseSubmoduleID - F.LocalBaseSubmoduleID));
+
+ SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules);
+ }
+
+ auto ReadSubmodule = [&](unsigned LocalID) -> Module * {
+ return getSubmodule(getGlobalSubmoduleID(F, LocalID));
+ };
+
+ if (PP.getHeaderSearchInfo().getModuleMap().findModule(F.ModuleName)) {
+ // If we already knew about this module, make sure to bring all
+ // submodules up to date.
+ for (unsigned Index = 0; Index != F.LocalNumSubmodules; ++Index) {
+ unsigned LocalID =
+ Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS;
+ ReadSubmodule(LocalID);
+ }
+ } else {
+ // If we didn't know this module, we loaded it transitively. Deserialize
+ // just the top-level module to register it with ModuleMap, but load the
+ // rest lazily.
+ ReadSubmodule(F.LocalTopLevelSubmoduleID);
+ }
+
+ break;
+ }
+
case TYPE_OFFSET: {
if (F.LocalNumTypes != 0)
return llvm::createStringError(
@@ -5087,41 +5136,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(ModuleFileName FileName,
F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
}
- // Resolve any unresolved module exports.
- for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
- UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
- SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID);
- Module *ResolvedMod = getSubmodule(GlobalID);
-
- switch (Unresolved.Kind) {
- case UnresolvedModuleRef::Conflict:
- if (ResolvedMod) {
- Module::Conflict Conflict;
- Conflict.Other = ResolvedMod;
- Conflict.Message = Unresolved.String.str();
- Unresolved.Mod->Conflicts.push_back(Conflict);
- }
- continue;
-
- case UnresolvedModuleRef::Import:
- if (ResolvedMod)
- Unresolved.Mod->Imports.insert(ResolvedMod);
- continue;
-
- case UnresolvedModuleRef::Affecting:
- if (ResolvedMod)
- Unresolved.Mod->AffectingClangModules.insert(ResolvedMod);
- continue;
-
- case UnresolvedModuleRef::Export:
- if (ResolvedMod || Unresolved.IsWildcard)
- Unresolved.Mod->Exports.push_back(Module::ExportDecl(
- ResolvedMod, static_cast<bool>(Unresolved.IsWildcard)));
- continue;
- }
- }
- UnresolvedModuleRefs.clear();
-
// FIXME: How do we load the 'use'd modules? They may not be submodules.
// Might be unnecessary as use declarations are only used to build the
// module itself.
@@ -6277,11 +6291,34 @@ bool ASTReader::isAcceptableASTFile(
/*ValidateDiagnosticOptions=*/true);
}
-llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
- unsigned ClientLoadCapabilities) {
- // Enter the submodule block.
- if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID))
- return Err;
+Module *ASTReader::getSubmodule(uint32_t GlobalID) {
+ if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
+ assert(GlobalID == 0 && "Unhandled global submodule ID");
+ return nullptr;
+ }
+
+ if (GlobalID > SubmodulesLoaded.size()) {
+ Error("submodule ID out of range in AST file");
+ return nullptr;
+ }
+
+ SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
+ if (GlobalIndex < SubmodulesLoaded.size() && SubmodulesLoaded[GlobalIndex])
+ return SubmodulesLoaded[GlobalIndex];
+
+ GlobalSubmoduleMapType::iterator It = GlobalSubmoduleMap.find(GlobalID);
+ assert(It != GlobalSubmoduleMap.end());
+ ModuleFile &F = *It->second;
+ unsigned Index = GlobalID - F.BaseSubmoduleID - NUM_PREDEF_SELECTOR_IDS;
+ unsigned LocalID = Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS;
+
+ BitstreamCursor &Cursor = F.SubmodulesCursor;
+ SavedStreamPosition SavedPosition(Cursor);
+ unsigned Offset = F.SubmoduleOffsets[Index];
+ if (llvm::Error Err = Cursor.JumpToBit(F.SubmodulesOffsetBase + Offset)) {
+ Error(std::move(Err));
+ return nullptr;
+ }
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
@@ -6292,23 +6329,24 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
? &ModuleMap::createModule
: &ModuleMap::findOrCreateModuleFirst;
- bool First = true;
Module *CurrentModule = nullptr;
RecordData Record;
while (true) {
- Expected<llvm::BitstreamEntry> MaybeEntry =
- F.Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+ if (!MaybeEntry) {
+ Error(MaybeEntry.takeError());
+ return nullptr;
+ }
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
- case llvm::BitstreamEntry::SubBlock: // Handled for us already.
+ case llvm::BitstreamEntry::SubBlock:
case llvm::BitstreamEntry::Error:
- return llvm::createStringError(std::errc::illegal_byte_sequence,
- "malformed block record in AST file");
- case llvm::BitstreamEntry::EndBlock:
- return llvm::Error::success();
+ case llvm::BitstreamEntry::EndBlock: {
+ Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed block record in AST file"));
+ return nullptr;
+ }
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
@@ -6317,35 +6355,35 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
// Read a record.
StringRef Blob;
Record.clear();
- Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeKind)
- return MaybeKind.takeError();
- unsigned Kind = MaybeKind.get();
-
- if ((Kind == SUBMODULE_METADATA) != First)
- return llvm::createStringError(
- std::errc::illegal_byte_sequence,
- "submodule metadata record should be at beginning of block");
- First = false;
-
- // Submodule information is only valid if we have a current module.
- // FIXME: Should we error on these cases?
- if (!CurrentModule && Kind != SUBMODULE_METADATA &&
- Kind != SUBMODULE_DEFINITION)
- continue;
+ Expected<unsigned> MaybeKind = Cursor.readRecord(Entry.ID, Record, &Blob);
+ if (!MaybeKind) {
+ Error(MaybeKind.takeError());
+ return nullptr;
+ }
+ auto Kind = static_cast<SubmoduleRecordTypes>(MaybeKind.get());
switch (Kind) {
- default: // Default behavior: ignore.
- break;
+ case SUBMODULE_END:
+ if (!CurrentModule) {
+ Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed module definition"));
+ return nullptr;
+ }
+ return CurrentModule;
case SUBMODULE_DEFINITION: {
- if (Record.size() < 13)
- return llvm::createStringError(std::errc::illegal_byte_sequence,
- "malformed module definition");
+ if (Record.size() < 13) {
+ Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed module definition"));
+ return nullptr;
+ }
StringRef Name = Blob;
unsigned Idx = 0;
- SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
+ unsigned ReadLocalID = Record[Idx++];
+ assert(LocalID == ReadLocalID);
+ SubmoduleID ReadGlobalID = getGlobalSubmoduleID(F, ReadLocalID);
+ assert(GlobalID == ReadGlobalID);
SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
@@ -6362,18 +6400,15 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
bool NamedModuleHasInit = Record[Idx++];
Module *ParentModule = nullptr;
- if (Parent)
+ if (Parent) {
ParentModule = getSubmodule(Parent);
+ if (!ParentModule)
+ return nullptr;
+ }
CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
IsFramework, IsExplicit);
- SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
- if (GlobalIndex >= SubmodulesLoaded.size() ||
- SubmodulesLoaded[GlobalIndex])
- return llvm::createStringError(std::errc::invalid_argument,
- "too many submodules");
-
if (!ParentModule) {
if ([[maybe_unused]] const ModuleFileKey *CurFileKey =
CurrentModule->getASTFileKey()) {
@@ -6394,7 +6429,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
Diag(diag::note_module_file_conflict)
<< CurModMapFile->getName() << ModMapFile->getName();
- return llvm::make_error<AlreadyReportedDiagnosticError>();
+ return nullptr;
}
}
@@ -6504,59 +6539,29 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
break;
}
- case SUBMODULE_METADATA: {
- F.BaseSubmoduleID = getTotalNumSubmodules();
- F.LocalNumSubmodules = Record[0];
- unsigned LocalBaseSubmoduleID = Record[1];
- if (F.LocalNumSubmodules > 0) {
- // Introduce the global -> local mapping for submodules within this
- // module.
- GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F));
-
- // Introduce the local -> global mapping for submodules within this
- // module.
- F.SubmoduleRemap.insertOrReplace(
- std::make_pair(LocalBaseSubmoduleID,
- F.BaseSubmoduleID - LocalBaseSubmoduleID));
-
- SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules);
- }
- break;
- }
-
case SUBMODULE_IMPORTS:
for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
- UnresolvedModuleRef Unresolved;
- Unresolved.File = &F;
- Unresolved.Mod = CurrentModule;
- Unresolved.ID = Record[Idx];
- Unresolved.Kind = UnresolvedModuleRef::Import;
- Unresolved.IsWildcard = false;
- UnresolvedModuleRefs.push_back(Unresolved);
+ SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+ CurrentModule->Imports.push_back(ModuleRef(this, GlobalID));
}
break;
case SUBMODULE_AFFECTING_MODULES:
for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
- UnresolvedModuleRef Unresolved;
- Unresolved.File = &F;
- Unresolved.Mod = CurrentModule;
- Unresolved.ID = Record[Idx];
- Unresolved.Kind = UnresolvedModuleRef::Affecting;
- Unresolved.IsWildcard = false;
- UnresolvedModuleRefs.push_back(Unresolved);
+ SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+ CurrentModule->AffectingClangModules.push_back(
+ ModuleRef(this, GlobalID));
}
break;
case SUBMODULE_EXPORTS:
for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) {
- UnresolvedModuleRef Unresolved;
- Unresolved.File = &F;
- Unresolved.Mod = CurrentModule;
- Unresolved.ID = Record[Idx];
- Unresolved.Kind = UnresolvedModuleRef::Export;
- Unresolved.IsWildcard = Record[Idx + 1];
- UnresolvedModuleRefs.push_back(Unresolved);
+ SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+ bool IsWildcard = Record[Idx + 1];
+ ModuleRef ExportedMod =
+ GlobalID ? ModuleRef(this, GlobalID) : ModuleRef();
+ if (ExportedMod || IsWildcard)
+ CurrentModule->Exports.push_back({ExportedMod, IsWildcard});
}
// Once we've loaded the set of exports, there's no reason to keep
@@ -6580,14 +6585,11 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
break;
case SUBMODULE_CONFLICT: {
- UnresolvedModuleRef Unresolved;
- Unresolved.File = &F;
- Unresolved.Mod = CurrentModule;
- Unresolved.ID = Record[0];
- Unresolved.Kind = UnresolvedModuleRef::Conflict;
- Unresolved.IsWildcard = false;
- Unresolved.String = Blob;
- UnresolvedModuleRefs.push_back(Unresolved);
+ SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]);
+ Module::Conflict Conflict;
+ Conflict.Other = ModuleRef(this, GlobalID);
+ Conflict.Message = Blob.str();
+ CurrentModule->Conflicts.push_back(Conflict);
break;
}
@@ -6608,6 +6610,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
CurrentModule->ExportAsModule = Blob.str();
ModMap.addLinkAsDependency(CurrentModule);
break;
+
+ case SUBMODULE_CHILD: {
+ // Record a not-yet-loaded direct child for on-demand deserialization.
+ SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]);
+ CurrentModule->addSubmodule(Blob, this, GlobalID);
+ break;
+ }
}
}
}
@@ -10045,20 +10054,6 @@ ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const {
return LocalID + I->second;
}
-Module *ASTReader::getSubmodule(SubmoduleID GlobalID) {
- if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
- assert(GlobalID == 0 && "Unhandled global submodule ID");
- return nullptr;
- }
-
- if (GlobalID > SubmodulesLoaded.size()) {
- Error("submodule ID out of range in AST file");
- return nullptr;
- }
-
- return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS];
-}
-
Module *ASTReader::getModule(unsigned ID) {
return getSubmodule(ID);
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ba644fefc109a..fc24c57768092 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -919,6 +919,7 @@ void ASTWriter::WriteBlockInfoBlock() {
// AST Top-Level Block.
BLOCK(AST_BLOCK);
+ RECORD(SUBMODULE_METADATA);
RECORD(TYPE_OFFSET);
RECORD(DECL_OFFSET);
RECORD(IDENTIFIER_OFFSET);
@@ -997,7 +998,7 @@ void ASTWriter::WriteBlockInfoBlock() {
// Submodule Block.
BLOCK(SUBMODULE_BLOCK);
- RECORD(SUBMODULE_METADATA);
+ RECORD(SUBMODULE_END);
RECORD(SUBMODULE_DEFINITION);
RECORD(SUBMODULE_UMBRELLA_HEADER);
RECORD(SUBMODULE_HEADER);
@@ -1016,6 +1017,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER);
RECORD(SUBMODULE_INITIALIZERS);
RECORD(SUBMODULE_EXPORT_AS);
+ RECORD(SUBMODULE_CHILD);
// Comments Block.
BLOCK(COMMENTS_BLOCK);
@@ -2983,16 +2985,6 @@ unsigned ASTWriter::getSubmoduleID(Module *Mod) {
return ID;
}
-/// Compute the number of modules within the given tree (including the
-/// given module).
-static unsigned getNumberOfModules(Module *Mod) {
- unsigned ChildModules = 0;
- for (Module *Submodule : Mod->submodules())
- ChildModules += getNumberOfModules(Submodule);
-
- return ChildModules + 1;
-}
-
void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
// Enter the submodule description block.
Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
@@ -3088,11 +3080,16 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name
unsigned ExportAsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
- // Write the submodule metadata block.
- RecordData::value_type Record[] = {
- getNumberOfModules(WritingModule),
- FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS};
- Stream.EmitRecord(SUBMODULE_METADATA, Record);
+ Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CHILD));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Child submodule ID
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Child name
+ unsigned ChildAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+ SmallVector<uint64_t> SubmoduleOffsets;
+ uint64_t SubmoduleOffsetBase = Stream.GetCurrentBitNo();
+
+ unsigned TopLevelID = getSubmoduleID(WritingModule);
// Write all of the submodules.
std::queue<Module *> Q;
@@ -3101,6 +3098,19 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
Module *Mod = Q.front();
Q.pop();
unsigned ID = getSubmoduleID(Mod);
+ if (ID < FirstSubmoduleID) {
+ assert(0 && "Loaded submodule entered WritingModule ?");
+ continue;
+ }
+
+ // Record the local offset of this submodule.
+ unsigned Index = ID - FirstSubmoduleID;
+ if (Index >= SubmoduleOffsets.size())
+ SubmoduleOffsets.resize(Index + 1);
+
+ uint64_t Offset = Stream.GetCurrentBitNo() - SubmoduleOffsetBase;
+ assert((Offset >> 32) == 0 && "Submodule offset too large");
+ SubmoduleOffsets[Index] = Offset;
uint64_t ParentID = 0;
if (Mod->Parent) {
@@ -3259,6 +3269,19 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
Stream.EmitRecordWithBlob(ExportAsAbbrev, Record, Mod->ExportAsModule);
}
+ // Emit one SUBMODULE_CHILD record per direct child so the reader can
+ // populate PendingSubmodules and demand-load children by name.
+ for (Module *Child : Mod->submodules()) {
+ RecordData::value_type Record[] = {SUBMODULE_CHILD, getSubmoduleID(Child)};
+ Stream.EmitRecordWithBlob(ChildAbbrev, Record, Child->Name);
+ }
+
+ // Emit the sentinel signifying the end of this submodule.
+ {
+ RecordData Record;
+ Stream.EmitRecord(SUBMODULE_END, Record);
+ }
+
// Queue up the submodules of this module.
for (Module *M : Mod->submodules())
Q.push(M);
@@ -3266,10 +3289,23 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
Stream.ExitBlock();
- assert((NextSubmoduleID - FirstSubmoduleID ==
- getNumberOfModules(WritingModule)) &&
+ assert((NextSubmoduleID - FirstSubmoduleID == SubmoduleOffsets.size()) &&
"Wrong # of submodules; found a reference to a non-local, "
"non-imported submodule?");
+
+ Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_METADATA));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Submodule count
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Base submodule ID
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Top-level submod ID
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Submodule offsets
+ unsigned SubmoduleMetadataAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+ RecordData::value_type Record[] = {
+ SUBMODULE_METADATA, SubmoduleOffsets.size(),
+ FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS, TopLevelID};
+ Stream.EmitRecordWithBlob(SubmoduleMetadataAbbrev, Record,
+ bytes(SubmoduleOffsets));
}
void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
>From 0d21c736431fca9d1dbf93f1eda799ce19972744 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 29 Apr 2026 15:59:06 -0700
Subject: [PATCH 2/2] git-clang-format
---
clang/include/clang/Basic/Module.h | 4 +---
clang/include/clang/Serialization/ASTReader.h | 16 +++++++---------
clang/lib/Lex/ModuleMap.cpp | 3 ++-
clang/lib/Serialization/ASTReader.cpp | 5 +++--
clang/lib/Serialization/ASTWriter.cpp | 3 ++-
5 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index cadad5bf90e17..14200b50569c9 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -251,9 +251,7 @@ class ModuleRef {
SubmoduleID = ID;
}
- operator bool() const {
- return Existing || (ExternalSource && SubmoduleID);
- }
+ operator bool() const { return Existing || (ExternalSource && SubmoduleID); }
operator Module *() const {
if (ExternalSource) {
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 7ca79f01937fe..bedac9f8a540a 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -418,15 +418,13 @@ struct LookupBlockOffsets : VisibleLookupBlockOffsets {
/// The AST reader provides lazy de-serialization of declarations, as
/// required when traversing the AST. Only those AST nodes that are
/// actually required will be de-serialized.
-class ASTReader
- : public ExternalPreprocessorSource,
- public ExternalPreprocessingRecordSource,
- public ExternalHeaderFileInfoSource,
- public ExternalSemaSource,
- public IdentifierInfoLookup,
- public ExternalSLocEntrySource,
- public ExternalSubmoduleSource
-{
+class ASTReader : public ExternalPreprocessorSource,
+ public ExternalPreprocessingRecordSource,
+ public ExternalHeaderFileInfoSource,
+ public ExternalSemaSource,
+ public IdentifierInfoLookup,
+ public ExternalSLocEntrySource,
+ public ExternalSubmoduleSource {
public:
/// Types of AST files.
friend class ASTDeclMerger;
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 6e2f7e0761c8e..e771a4b50111d 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -938,7 +938,8 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
return findModule(Name);
}
-ModuleRef ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{
+ModuleRef ModuleMap::lookupModuleQualified(StringRef Name,
+ Module *Context) const {
if (!Context)
return findModule(Name);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 41eca76d36d82..e44cee65ce517 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3830,8 +3830,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
// Introduce the local -> global mapping for submodules within this
// module.
- F.SubmoduleRemap.insertOrReplace(std::make_pair(
- F.LocalBaseSubmoduleID, F.BaseSubmoduleID - F.LocalBaseSubmoduleID));
+ F.SubmoduleRemap.insertOrReplace(
+ std::make_pair(F.LocalBaseSubmoduleID,
+ F.BaseSubmoduleID - F.LocalBaseSubmoduleID));
SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules);
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index fc24c57768092..1970ed86589b5 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3272,7 +3272,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
// Emit one SUBMODULE_CHILD record per direct child so the reader can
// populate PendingSubmodules and demand-load children by name.
for (Module *Child : Mod->submodules()) {
- RecordData::value_type Record[] = {SUBMODULE_CHILD, getSubmoduleID(Child)};
+ RecordData::value_type Record[] = {SUBMODULE_CHILD,
+ getSubmoduleID(Child)};
Stream.EmitRecordWithBlob(ChildAbbrev, Record, Child->Name);
}
More information about the cfe-commits
mailing list