[clang] [clang-tools-extra] [clang][modules] Shrink the size of `Module::Headers` (PR #113395)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 24 09:03:42 PDT 2024


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/113395

>From 09246d11c8663c0b2b31317eddc297c1d29fcd60 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Tue, 22 Oct 2024 16:07:27 -0700
Subject: [PATCH 1/3] [clang][modules] Shrink the size of `Module::Headers`

This patch shrinks the size of the `Module` class from 2112B to 1624B. I wasn't able to get a good data on the actual impact on memory usage, but given my `clang-scan-deps` workload at hand (with tens of thousands of instances), I think there should be some win here. This also speeds up my benchmark by under 0.1%.
---
 clang/include/clang/Basic/Module.h    | 18 ++++++++++++++++--
 clang/lib/Basic/Module.cpp            |  2 +-
 clang/lib/Frontend/FrontendAction.cpp |  2 +-
 clang/lib/Lex/ModuleMap.cpp           | 20 +++++++++++---------
 clang/lib/Serialization/ASTWriter.cpp |  4 ++--
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 9c5d33fbb562cc..b8f1e00d6fac1f 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -271,8 +271,22 @@ class alignas(8) Module {
     DirectoryEntryRef Entry;
   };
 
-  /// The headers that are part of this module.
-  SmallVector<Header, 2> Headers[5];
+private:
+  unsigned HeaderKindBeginIndex[6] = {};
+  SmallVector<Header, 2> HeadersStorage;
+
+public:
+  ArrayRef<Header> getHeaders(HeaderKind HK) const {
+    auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
+    auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+    return {BeginIt, EndIt};
+  }
+  void addHeader(HeaderKind HK, Header H) {
+    auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+    HeadersStorage.insert(EndIt, std::move(H));
+    for (unsigned HKI = HK + 1; HKI != 6; ++HKI)
+      ++HeaderKindBeginIndex[HKI];
+  }
 
   /// Stored information about a header directive that was found in the
   /// module map file but has not been resolved to a file.
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index ad52fccff5dc7f..a7a3f6b37efef1 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -528,7 +528,7 @@ void Module::print(raw_ostream &OS, unsigned Indent, bool Dump) const {
 
   for (auto &K : Kinds) {
     assert(&K == &Kinds[K.Kind] && "kinds in wrong order");
-    for (auto &H : Headers[K.Kind]) {
+    for (auto &H : getHeaders(K.Kind)) {
       OS.indent(Indent + 2);
       OS << K.Prefix << "header \"";
       OS.write_escaped(H.NameAsWritten);
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 81eea9c4c4dc58..8264bd702fe43f 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes(
 
   // Add includes for each of these headers.
   for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
-    for (Module::Header &H : Module->Headers[HK]) {
+    for (const Module::Header &H : Module->getHeaders(HK)) {
       Module->addTopHeader(H.Entry);
       // Use the path as specified in the module map file. We'll look for this
       // file relative to the module build directory (the directory containing
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index fd6bc17ae9cdac..23875912fa735e 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule,
     // as obtained from the lookup and as obtained from the module.
     // This check is not cheap, so enable it only for debugging.
     bool IsPrivate = false;
-    SmallVectorImpl<Module::Header> *HeaderList[] = {
-        &Header.getModule()->Headers[Module::HK_Private],
-        &Header.getModule()->Headers[Module::HK_PrivateTextual]};
-    for (auto *Hs : HeaderList)
+    ArrayRef<Module::Header> HeaderList[] = {
+        Header.getModule()->getHeaders(Module::HK_Private),
+        Header.getModule()->getHeaders(Module::HK_PrivateTextual)};
+    for (auto Hs : HeaderList)
       IsPrivate |= llvm::any_of(
-          *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
+          Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
     assert(IsPrivate && "inconsistent headers and roles");
   }
 #endif
@@ -1296,27 +1296,29 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
                           ModuleHeaderRole Role, bool Imported) {
   KnownHeader KH(Mod, Role);
 
+  FileEntryRef HeaderEntry = Header.Entry;
+
   // Only add each header to the headers list once.
   // FIXME: Should we diagnose if a header is listed twice in the
   // same module definition?
-  auto &HeaderList = Headers[Header.Entry];
+  auto &HeaderList = Headers[HeaderEntry];
   if (llvm::is_contained(HeaderList, KH))
     return;
 
   HeaderList.push_back(KH);
-  Mod->Headers[headerRoleToKind(Role)].push_back(Header);
+  Mod->addHeader(headerRoleToKind(Role), std::move(Header));
 
   bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts);
   if (!Imported || isCompilingModuleHeader) {
     // When we import HeaderFileInfo, the external source is expected to
     // set the isModuleHeader flag itself.
-    HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
+    HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role,
                                     isCompilingModuleHeader);
   }
 
   // Notify callbacks that we just added a new header.
   for (const auto &Cb : Callbacks)
-    Cb->moduleMapAddHeader(Header.Entry.getName());
+    Cb->moduleMapAddHeader(HeaderEntry.getName());
 }
 
 FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 494890284d2f2c..b576822fa704c8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3070,9 +3070,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
         Module::HK_PrivateTextual},
       {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
     };
-    for (auto &HL : HeaderLists) {
+    for (const auto &HL : HeaderLists) {
       RecordData::value_type Record[] = {HL.RecordKind};
-      for (auto &H : Mod->Headers[HL.HeaderKind])
+      for (const auto &H : Mod->getHeaders(HL.HeaderKind))
         Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
     }
 

>From 27572e7af40622d4ea48931f89018bffda4334c9 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 24 Oct 2024 08:43:17 -0700
Subject: [PATCH 2/3] Fix clang-tools-extra build

---
 clang-tools-extra/modularize/CoverageChecker.cpp   |  7 +++----
 .../modularize/ModularizeUtilities.cpp             | 14 +++-----------
 clang/include/clang/Basic/Module.h                 |  1 +
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp
index 0e76c539aa3c83..b536ee00497c03 100644
--- a/clang-tools-extra/modularize/CoverageChecker.cpp
+++ b/clang-tools-extra/modularize/CoverageChecker.cpp
@@ -223,10 +223,9 @@ bool CoverageChecker::collectModuleHeaders(const Module &Mod) {
       return false;
   }
 
-  for (auto &HeaderKind : Mod.Headers)
-    for (auto &Header : HeaderKind)
-      ModuleMapHeadersSet.insert(
-          ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
+  for (const auto &Header : Mod.getAllHeaders())
+    ModuleMapHeadersSet.insert(
+        ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
 
   for (auto *Submodule : Mod.submodules())
     collectModuleHeaders(*Submodule);
diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp
index b202b3aae8f8a3..476e13770a94f6 100644
--- a/clang-tools-extra/modularize/ModularizeUtilities.cpp
+++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp
@@ -358,7 +358,7 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
   } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
                  Mod.getUmbrellaDirAsWritten()) {
     // If there normal headers, assume these are umbrellas and skip collection.
-    if (Mod.Headers->size() == 0) {
+    if (Mod.getHeaders(Module::HK_Normal).empty()) {
       // Collect headers in umbrella directory.
       if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(),
                                   UmbrellaDependents))
@@ -371,16 +371,8 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
   // modules or because they are meant to be included by another header,
   // and thus should be ignored by modularize.
 
-  int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
-
-  for (int Index = 0; Index < NormalHeaderCount; ++Index) {
-    DependentsVector NormalDependents;
-    // Collect normal header.
-    const clang::Module::Header &Header(
-      Mod.Headers[clang::Module::HK_Normal][Index]);
-    std::string HeaderPath = getCanonicalPath(Header.Entry.getName());
-    HeaderFileNames.push_back(HeaderPath);
-  }
+  for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal))
+    HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName()));
 
   int MissingCountThisModule = Mod.MissingHeaders.size();
 
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index b8f1e00d6fac1f..d2eec34993abb6 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -276,6 +276,7 @@ class alignas(8) Module {
   SmallVector<Header, 2> HeadersStorage;
 
 public:
+  ArrayRef<Header> getAllHeaders() const { return HeadersStorage; }
   ArrayRef<Header> getHeaders(HeaderKind HK) const {
     auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
     auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];

>From 3b55b7c275d8e02362f8ca0d73d6f27799688525 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 24 Oct 2024 09:03:26 -0700
Subject: [PATCH 3/3] Address review feedback

---
 clang/include/clang/Basic/Module.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index d2eec34993abb6..1ab3b5e5f81567 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -253,8 +253,6 @@ class alignas(8) Module {
     HK_PrivateTextual,
     HK_Excluded
   };
-  static const int NumHeaderKinds = HK_Excluded + 1;
-
   /// Information about a header directive as found in the module map
   /// file.
   struct Header {
@@ -263,32 +261,36 @@ class alignas(8) Module {
     FileEntryRef Entry;
   };
 
-  /// Information about a directory name as found in the module map
-  /// file.
-  struct DirectoryName {
-    std::string NameAsWritten;
-    std::string PathRelativeToRootModuleDirectory;
-    DirectoryEntryRef Entry;
-  };
-
 private:
-  unsigned HeaderKindBeginIndex[6] = {};
+  static const int NumHeaderKinds = HK_Excluded + 1;
+  // The begin index for a HeaderKind also acts the end index of HeaderKind - 1.
+  // The extra element at the end acts as the end index of the last HeaderKind.
+  unsigned HeaderKindBeginIndex[NumHeaderKinds + 1] = {};
   SmallVector<Header, 2> HeadersStorage;
 
 public:
   ArrayRef<Header> getAllHeaders() const { return HeadersStorage; }
   ArrayRef<Header> getHeaders(HeaderKind HK) const {
+    assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
     auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
     auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
     return {BeginIt, EndIt};
   }
   void addHeader(HeaderKind HK, Header H) {
+    assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
     auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
     HeadersStorage.insert(EndIt, std::move(H));
-    for (unsigned HKI = HK + 1; HKI != 6; ++HKI)
+    for (unsigned HKI = HK + 1; HKI != NumHeaderKinds + 1; ++HKI)
       ++HeaderKindBeginIndex[HKI];
   }
 
+  /// Information about a directory name as found in the module map file.
+  struct DirectoryName {
+    std::string NameAsWritten;
+    std::string PathRelativeToRootModuleDirectory;
+    DirectoryEntryRef Entry;
+  };
+
   /// Stored information about a header directive that was found in the
   /// module map file but has not been resolved to a file.
   struct UnresolvedHeaderDirective {



More information about the cfe-commits mailing list