[clang] [clang-tools-extra] [clang][modules] Shrink the size of `Module::Headers` (PR #113395)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 24 09:03:42 PDT 2024
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/113395
>From 09246d11c8663c0b2b31317eddc297c1d29fcd60 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Tue, 22 Oct 2024 16:07:27 -0700
Subject: [PATCH 1/3] [clang][modules] Shrink the size of `Module::Headers`
This patch shrinks the size of the `Module` class from 2112B to 1624B. I wasn't able to get a good data on the actual impact on memory usage, but given my `clang-scan-deps` workload at hand (with tens of thousands of instances), I think there should be some win here. This also speeds up my benchmark by under 0.1%.
---
clang/include/clang/Basic/Module.h | 18 ++++++++++++++++--
clang/lib/Basic/Module.cpp | 2 +-
clang/lib/Frontend/FrontendAction.cpp | 2 +-
clang/lib/Lex/ModuleMap.cpp | 20 +++++++++++---------
clang/lib/Serialization/ASTWriter.cpp | 4 ++--
5 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 9c5d33fbb562cc..b8f1e00d6fac1f 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -271,8 +271,22 @@ class alignas(8) Module {
DirectoryEntryRef Entry;
};
- /// The headers that are part of this module.
- SmallVector<Header, 2> Headers[5];
+private:
+ unsigned HeaderKindBeginIndex[6] = {};
+ SmallVector<Header, 2> HeadersStorage;
+
+public:
+ ArrayRef<Header> getHeaders(HeaderKind HK) const {
+ auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
+ auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+ return {BeginIt, EndIt};
+ }
+ void addHeader(HeaderKind HK, Header H) {
+ auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+ HeadersStorage.insert(EndIt, std::move(H));
+ for (unsigned HKI = HK + 1; HKI != 6; ++HKI)
+ ++HeaderKindBeginIndex[HKI];
+ }
/// Stored information about a header directive that was found in the
/// module map file but has not been resolved to a file.
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index ad52fccff5dc7f..a7a3f6b37efef1 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -528,7 +528,7 @@ void Module::print(raw_ostream &OS, unsigned Indent, bool Dump) const {
for (auto &K : Kinds) {
assert(&K == &Kinds[K.Kind] && "kinds in wrong order");
- for (auto &H : Headers[K.Kind]) {
+ for (auto &H : getHeaders(K.Kind)) {
OS.indent(Indent + 2);
OS << K.Prefix << "header \"";
OS.write_escaped(H.NameAsWritten);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 81eea9c4c4dc58..8264bd702fe43f 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes(
// Add includes for each of these headers.
for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
- for (Module::Header &H : Module->Headers[HK]) {
+ for (const Module::Header &H : Module->getHeaders(HK)) {
Module->addTopHeader(H.Entry);
// Use the path as specified in the module map file. We'll look for this
// file relative to the module build directory (the directory containing
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index fd6bc17ae9cdac..23875912fa735e 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule,
// as obtained from the lookup and as obtained from the module.
// This check is not cheap, so enable it only for debugging.
bool IsPrivate = false;
- SmallVectorImpl<Module::Header> *HeaderList[] = {
- &Header.getModule()->Headers[Module::HK_Private],
- &Header.getModule()->Headers[Module::HK_PrivateTextual]};
- for (auto *Hs : HeaderList)
+ ArrayRef<Module::Header> HeaderList[] = {
+ Header.getModule()->getHeaders(Module::HK_Private),
+ Header.getModule()->getHeaders(Module::HK_PrivateTextual)};
+ for (auto Hs : HeaderList)
IsPrivate |= llvm::any_of(
- *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
+ Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
assert(IsPrivate && "inconsistent headers and roles");
}
#endif
@@ -1296,27 +1296,29 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
ModuleHeaderRole Role, bool Imported) {
KnownHeader KH(Mod, Role);
+ FileEntryRef HeaderEntry = Header.Entry;
+
// Only add each header to the headers list once.
// FIXME: Should we diagnose if a header is listed twice in the
// same module definition?
- auto &HeaderList = Headers[Header.Entry];
+ auto &HeaderList = Headers[HeaderEntry];
if (llvm::is_contained(HeaderList, KH))
return;
HeaderList.push_back(KH);
- Mod->Headers[headerRoleToKind(Role)].push_back(Header);
+ Mod->addHeader(headerRoleToKind(Role), std::move(Header));
bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts);
if (!Imported || isCompilingModuleHeader) {
// When we import HeaderFileInfo, the external source is expected to
// set the isModuleHeader flag itself.
- HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
+ HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role,
isCompilingModuleHeader);
}
// Notify callbacks that we just added a new header.
for (const auto &Cb : Callbacks)
- Cb->moduleMapAddHeader(Header.Entry.getName());
+ Cb->moduleMapAddHeader(HeaderEntry.getName());
}
FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 494890284d2f2c..b576822fa704c8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3070,9 +3070,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
Module::HK_PrivateTextual},
{SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
};
- for (auto &HL : HeaderLists) {
+ for (const auto &HL : HeaderLists) {
RecordData::value_type Record[] = {HL.RecordKind};
- for (auto &H : Mod->Headers[HL.HeaderKind])
+ for (const auto &H : Mod->getHeaders(HL.HeaderKind))
Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
}
>From 27572e7af40622d4ea48931f89018bffda4334c9 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 24 Oct 2024 08:43:17 -0700
Subject: [PATCH 2/3] Fix clang-tools-extra build
---
clang-tools-extra/modularize/CoverageChecker.cpp | 7 +++----
.../modularize/ModularizeUtilities.cpp | 14 +++-----------
clang/include/clang/Basic/Module.h | 1 +
3 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp
index 0e76c539aa3c83..b536ee00497c03 100644
--- a/clang-tools-extra/modularize/CoverageChecker.cpp
+++ b/clang-tools-extra/modularize/CoverageChecker.cpp
@@ -223,10 +223,9 @@ bool CoverageChecker::collectModuleHeaders(const Module &Mod) {
return false;
}
- for (auto &HeaderKind : Mod.Headers)
- for (auto &Header : HeaderKind)
- ModuleMapHeadersSet.insert(
- ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
+ for (const auto &Header : Mod.getAllHeaders())
+ ModuleMapHeadersSet.insert(
+ ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
for (auto *Submodule : Mod.submodules())
collectModuleHeaders(*Submodule);
diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp
index b202b3aae8f8a3..476e13770a94f6 100644
--- a/clang-tools-extra/modularize/ModularizeUtilities.cpp
+++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp
@@ -358,7 +358,7 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
} else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
Mod.getUmbrellaDirAsWritten()) {
// If there normal headers, assume these are umbrellas and skip collection.
- if (Mod.Headers->size() == 0) {
+ if (Mod.getHeaders(Module::HK_Normal).empty()) {
// Collect headers in umbrella directory.
if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(),
UmbrellaDependents))
@@ -371,16 +371,8 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
// modules or because they are meant to be included by another header,
// and thus should be ignored by modularize.
- int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
-
- for (int Index = 0; Index < NormalHeaderCount; ++Index) {
- DependentsVector NormalDependents;
- // Collect normal header.
- const clang::Module::Header &Header(
- Mod.Headers[clang::Module::HK_Normal][Index]);
- std::string HeaderPath = getCanonicalPath(Header.Entry.getName());
- HeaderFileNames.push_back(HeaderPath);
- }
+ for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal))
+ HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName()));
int MissingCountThisModule = Mod.MissingHeaders.size();
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index b8f1e00d6fac1f..d2eec34993abb6 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -276,6 +276,7 @@ class alignas(8) Module {
SmallVector<Header, 2> HeadersStorage;
public:
+ ArrayRef<Header> getAllHeaders() const { return HeadersStorage; }
ArrayRef<Header> getHeaders(HeaderKind HK) const {
auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
>From 3b55b7c275d8e02362f8ca0d73d6f27799688525 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 24 Oct 2024 09:03:26 -0700
Subject: [PATCH 3/3] Address review feedback
---
clang/include/clang/Basic/Module.h | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index d2eec34993abb6..1ab3b5e5f81567 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -253,8 +253,6 @@ class alignas(8) Module {
HK_PrivateTextual,
HK_Excluded
};
- static const int NumHeaderKinds = HK_Excluded + 1;
-
/// Information about a header directive as found in the module map
/// file.
struct Header {
@@ -263,32 +261,36 @@ class alignas(8) Module {
FileEntryRef Entry;
};
- /// Information about a directory name as found in the module map
- /// file.
- struct DirectoryName {
- std::string NameAsWritten;
- std::string PathRelativeToRootModuleDirectory;
- DirectoryEntryRef Entry;
- };
-
private:
- unsigned HeaderKindBeginIndex[6] = {};
+ static const int NumHeaderKinds = HK_Excluded + 1;
+ // The begin index for a HeaderKind also acts the end index of HeaderKind - 1.
+ // The extra element at the end acts as the end index of the last HeaderKind.
+ unsigned HeaderKindBeginIndex[NumHeaderKinds + 1] = {};
SmallVector<Header, 2> HeadersStorage;
public:
ArrayRef<Header> getAllHeaders() const { return HeadersStorage; }
ArrayRef<Header> getHeaders(HeaderKind HK) const {
+ assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
return {BeginIt, EndIt};
}
void addHeader(HeaderKind HK, Header H) {
+ assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
HeadersStorage.insert(EndIt, std::move(H));
- for (unsigned HKI = HK + 1; HKI != 6; ++HKI)
+ for (unsigned HKI = HK + 1; HKI != NumHeaderKinds + 1; ++HKI)
++HeaderKindBeginIndex[HKI];
}
+ /// Information about a directory name as found in the module map file.
+ struct DirectoryName {
+ std::string NameAsWritten;
+ std::string PathRelativeToRootModuleDirectory;
+ DirectoryEntryRef Entry;
+ };
+
/// Stored information about a header directive that was found in the
/// module map file but has not been resolved to a file.
struct UnresolvedHeaderDirective {
More information about the cfe-commits
mailing list