[clang] [clang][modules] Track included files per submodule (PR #71117)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 2 14:36:00 PDT 2023
https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/71117
None
>From 9debc58d5135fbde51967dfb076d0ec5d954df38 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 2 Nov 2023 14:35:30 -0700
Subject: [PATCH] [clang][modules] Track included files per submodule
---
clang/include/clang/Basic/Module.h | 2 +
clang/include/clang/Lex/Preprocessor.h | 39 +++++++++++---
clang/lib/Serialization/ASTReader.cpp | 52 ++++++++++---------
clang/lib/Serialization/ASTReaderInternals.h | 3 +-
clang/lib/Serialization/ASTWriter.cpp | 39 +++++++++++---
clang/test/Modules/per-submodule-includes.m | 53 ++++++++++++++++++++
6 files changed, 149 insertions(+), 39 deletions(-)
create mode 100644 clang/test/Modules/per-submodule-includes.m
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 6a7423938bdb8fa..41f3b4fb46423d6 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -423,6 +423,8 @@ class alignas(8) Module {
/// to import but didn't because they are not direct uses.
llvm::SmallSetVector<const Module *, 2> UndeclaredUses;
+ llvm::DenseSet<const FileEntry *> Includes;
+
/// A library or framework to link against when an entity from this
/// module is used.
struct LinkLibrary {
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4a99447e757c6ac..fef608efdee09d9 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -982,6 +982,8 @@ class Preprocessor {
/// The set of modules that are visible within the submodule.
VisibleModuleSet VisibleModules;
+ /// The files that have been included.
+ IncludedFilesSet IncludedFiles;
// FIXME: CounterValue?
// FIXME: PragmaPushMacroInfo?
};
@@ -994,8 +996,8 @@ class Preprocessor {
/// in a submodule.
SubmoduleState *CurSubmoduleState;
- /// The files that have been included.
- IncludedFilesSet IncludedFiles;
+ /// The files that have been included outside of (sub)modules.
+ IncludedFilesSet Includes;
/// The set of top-level modules that affected preprocessing, but were not
/// imported.
@@ -1484,19 +1486,40 @@ class Preprocessor {
/// Mark the file as included.
/// Returns true if this is the first time the file was included.
bool markIncluded(FileEntryRef File) {
- HeaderInfo.getFileInfo(File);
- return IncludedFiles.insert(File).second;
+ bool AlreadyIncluded = alreadyIncluded(File);
+ CurSubmoduleState->IncludedFiles.insert(File);
+ if (!BuildingSubmoduleStack.empty())
+ BuildingSubmoduleStack.back().M->Includes.insert(File);
+ else if (Module *M = getCurrentModule())
+ M->Includes.insert(File);
+ else
+ Includes.insert(File);
+ return !AlreadyIncluded;
}
/// Return true if this header has already been included.
bool alreadyIncluded(FileEntryRef File) const {
HeaderInfo.getFileInfo(File);
- return IncludedFiles.count(File);
+ if (CurSubmoduleState->IncludedFiles.contains(File))
+ return true;
+ // TODO: Do this more efficiently.
+ for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules())
+ if (CurSubmoduleState->VisibleModules.isVisible(M))
+ if (M->Includes.contains(File))
+ return true;
+ return false;
+ }
+
+ void markIncludedOnTopLevel(const FileEntry *File) {
+ Includes.insert(File);
+ CurSubmoduleState->IncludedFiles.insert(File);
+ }
+
+ void markIncludedInModule(Module *M, const FileEntry *File) {
+ M->Includes.insert(File);
}
- /// Get the set of included files.
- IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
- const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+ const IncludedFilesSet &getTopLevelIncludes() const { return Includes; }
/// Return the name of the macro defined before \p Loc that has
/// spelling \p Tokens. If there are multiple macros with same spelling,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 42b48d230af7a97..9c32dcf3d954761 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1975,19 +1975,15 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
return LocalID + I->second;
}
-const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
+OptionalFileEntryRefDegradesToFileEntryPtr
+HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
FileManager &FileMgr = Reader.getFileManager();
- if (!Key.Imported) {
- if (auto File = FileMgr.getFile(Key.Filename))
- return *File;
- return nullptr;
- }
+ if (!Key.Imported)
+ return FileMgr.getOptionalFileRef(Key.Filename);
std::string Resolved = std::string(Key.Filename);
Reader.ResolveImportedPath(M, Resolved);
- if (auto File = FileMgr.getFile(Resolved))
- return *File;
- return nullptr;
+ return FileMgr.getOptionalFileRef(Resolved);
}
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -2043,13 +2039,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
HeaderFileInfo HFI;
unsigned Flags = *d++;
- bool Included = (Flags >> 6) & 0x01;
- if (Included)
- if (const FileEntry *FE = getFile(key))
- // Not using \c Preprocessor::markIncluded(), since that would attempt to
- // deserialize this header file info again.
- Reader.getPreprocessor().getIncludedFiles().insert(FE);
-
// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
HFI.isImport |= (Flags >> 5) & 0x01;
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -2065,6 +2054,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
}
+ OptionalFileEntryRefDegradesToFileEntryPtr FE = getFile(key);
+ Preprocessor &PP = Reader.getPreprocessor();
+ ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+
+ unsigned IncludedCount =
+ endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+ for (unsigned I = 0; I < IncludedCount; ++I) {
+ uint32_t LocalSMID =
+ endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+ if (!FE)
+ continue;
+
+ if (LocalSMID == 0) {
+ PP.markIncludedOnTopLevel(*FE);
+ } else {
+ SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
+ Module *Mod = Reader.getSubmodule(GlobalSMID);
+ PP.markIncludedInModule(Mod, *FE);
+ }
+ }
+
assert((End - d) % 4 == 0 &&
"Wrong data length in HeaderFileInfo deserialization");
while (d != End) {
@@ -2077,14 +2087,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
// implicit module import.
SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
Module *Mod = Reader.getSubmodule(GlobalSMID);
- FileManager &FileMgr = Reader.getFileManager();
- ModuleMap &ModMap =
- Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
-
- std::string Filename = std::string(key.Filename);
- if (key.Imported)
- Reader.ResolveImportedPath(M, Filename);
- if (auto FE = FileMgr.getOptionalFileRef(Filename)) {
+
+ if (FE) {
// FIXME: NameAsWritten
Module::Header H = {std::string(key.Filename), "", *FE};
ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
diff --git a/clang/lib/Serialization/ASTReaderInternals.h b/clang/lib/Serialization/ASTReaderInternals.h
index 25a46ddabcb7078..1abcaa9ca299d1d 100644
--- a/clang/lib/Serialization/ASTReaderInternals.h
+++ b/clang/lib/Serialization/ASTReaderInternals.h
@@ -278,7 +278,8 @@ class HeaderFileInfoTrait {
data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
private:
- const FileEntry *getFile(const internal_key_type &Key);
+ OptionalFileEntryRefDegradesToFileEntryPtr
+ getFile(const internal_key_type &Key);
};
/// The on-disk hash table used for known header files.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ab70594607530f4..a7fddaf8e243b95 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1842,7 +1842,7 @@ namespace {
struct data_type {
const HeaderFileInfo &HFI;
- bool AlreadyIncluded;
+ std::vector<const Module *> Includers;
ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
UnresolvedModule Unresolved;
};
@@ -1862,6 +1862,12 @@ namespace {
EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
unsigned DataLen = 1 + 4 + 4;
+
+ DataLen += 4;
+ for (const Module *M : Data.Includers)
+ if (!M || Writer.getLocalOrImportedSubmoduleID(M))
+ DataLen += 4;
+
for (auto ModInfo : Data.KnownHeaders)
if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
DataLen += 4;
@@ -1888,8 +1894,7 @@ namespace {
endian::Writer LE(Out, llvm::endianness::little);
uint64_t Start = Out.tell(); (void)Start;
- unsigned char Flags = (Data.AlreadyIncluded << 6)
- | (Data.HFI.isImport << 5)
+ unsigned char Flags = (Data.HFI.isImport << 5)
| (Data.HFI.isPragmaOnce << 4)
| (Data.HFI.DirInfo << 1)
| Data.HFI.IndexHeaderMapHeader;
@@ -1916,6 +1921,14 @@ namespace {
}
LE.write<uint32_t>(Offset);
+ LE.write<uint32_t>(Data.Includers.size());
+ for (const Module *M : Data.Includers) {
+ if (!M)
+ LE.write<uint32_t>(0);
+ else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M))
+ LE.write<uint32_t>(ModID);
+ }
+
auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
uint32_t Value = (ModID << 3) | (unsigned)Role;
@@ -1990,7 +2003,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
HeaderFileInfoTrait::key_type Key = {
FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
HeaderFileInfoTrait::data_type Data = {
- Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+ Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
// FIXME: Deal with cases where there are multiple unresolved header
// directives in different submodules for the same header.
Generator.insert(Key, Data, GeneratorTrait);
@@ -2033,13 +2046,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
SavedStrings.push_back(Filename.data());
}
- bool Included = PP->alreadyIncluded(*File);
+ std::vector<const Module *> Includers;
+ if (WritingModule) {
+ llvm::DenseSet<const Module *> Seen;
+ std::function<void(const Module *)> Visit = [&](const Module *M) {
+ if (!Seen.insert(M).second)
+ return;
+ if (M->Includes.contains(*File))
+ Includers.push_back(M);
+ for (const Module *SubM : M->submodules())
+ Visit(SubM);
+ };
+ Visit(WritingModule);
+ } else if (PP->getTopLevelIncludes().contains(*File)) {
+ Includers.push_back(nullptr);
+ }
HeaderFileInfoTrait::key_type Key = {
Filename, File->getSize(), getTimestampForOutput(*File)
};
HeaderFileInfoTrait::data_type Data = {
- *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
+ *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
};
Generator.insert(Key, Data, GeneratorTrait);
++NumHeaderSearchEntries;
diff --git a/clang/test/Modules/per-submodule-includes.m b/clang/test/Modules/per-submodule-includes.m
new file mode 100644
index 000000000000000..39fd255e8e93e92
--- /dev/null
+++ b/clang/test/Modules/per-submodule-includes.m
@@ -0,0 +1,53 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- frameworks/Textual.framework/Headers/Header.h
+static int symbol;
+
+//--- frameworks/FW.framework/Modules/module.modulemap
+framework module FW {
+ umbrella header "FW.h"
+ export *
+ module * { export * }
+}
+//--- frameworks/FW.framework/Headers/FW.h
+#import <FW/Sub1.h>
+#import <FW/Sub2.h>
+//--- frameworks/FW.framework/Headers/Sub1.h
+//--- frameworks/FW.framework/Headers/Sub2.h
+#import <Textual/Header.h>
+
+//--- pch.modulemap
+module __PCH {
+ header "pch.h"
+ export *
+}
+//--- pch.h
+#import <FW/Sub1.h>
+
+//--- tu.m
+#import <Textual/Header.h>
+int fn() { return symbol; }
+
+// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the
+// PCH is treated textually due to -fmodule-name=FW.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN: -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN: -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m
+
+// Compilation using the PCH as precompiled module fails. The import of FW/Sub1.h
+// in the PCH is translated to an import. Nothing is preventing that now that
+// -fmodule-name=FW has been replaced with -fmodule-name=__PCH.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
+// RUN: -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o %t/pch.h.pcm
+//
+// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in
+// FW.Sub2), so the TU does not import it again. It's contents remain invisible,
+// though.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
+// RUN: -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only %t/tu.m
More information about the cfe-commits
mailing list