[clang] [ASTWriter] Do not allocate source location space for module maps used only for textual headers (PR #116374)

Ilya Biryukov via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 29 08:12:05 PST 2024


https://github.com/ilya-biryukov updated https://github.com/llvm/llvm-project/pull/116374

>From ff328f256824e30b2c3c8db184a0fcafaef32637 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 15 Nov 2024 11:18:21 +0100
Subject: [PATCH 1/5] [ASTWriter] Do not allocate source location space for
 module maps used only for textual headers

This is a follow up to #112015 and it reduces the duplication of source
locations further.

We do not need to allocate source location space in the serialized PCMs
for module maps used only to find textual headers. Those module maps are
never referenced from anywhere in the serialized ASTs and are re-read in
other compilations.

We do need the InputFile entry for the in the serialized AST because
clang-scan-deps relies on it. The previous patch introduced a mechanism
to do exactly that.
---
 clang/lib/Serialization/ASTWriter.cpp         |  46 +++++---
 ...-affecting-module-map-repeated-textual.cpp | 104 ++++++++++++++++++
 2 files changed, 136 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/Modules/prune-non-affecting-module-map-repeated-textual.cpp

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 88b3e649a5d466..a0d0e03178296d 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -184,14 +184,29 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
   const SourceManager &SM = PP.getSourceManager();
   const ModuleMap &MM = HS.getModuleMap();
 
-  llvm::DenseSet<FileID> ModuleMaps;
-
-  llvm::DenseSet<const Module *> ProcessedModules;
-  auto CollectModuleMapsForHierarchy = [&](const Module *M) {
+  // Module maps used only by textual headers are special. Their FileID is
+  // non-affecting, but their FileEntry is (i.e. must be written as InputFile).
+  enum AffectedReason : bool {
+    ARTextualHeader = 0,
+    ARImportOrTextualHeader = 1,
+  };
+  auto AssignMostImportant = [](AffectedReason &L, AffectedReason R) {
+    L = std::max(L, R);
+  };
+  llvm::DenseMap<FileID, AffectedReason> ModuleMaps;
+  llvm::DenseMap<const Module *, AffectedReason> ProcessedModules;
+  auto CollectModuleMapsForHierarchy = [&](const Module *M, AffectedReason Reason) {
     M = M->getTopLevelModule();
 
-    if (!ProcessedModules.insert(M).second)
+    // We need to process the header either when it was not present of when we
+    // previously flagged module map as textual headers and now we found a
+    // proper import.
+    if (auto [It, Inserted] = ProcessedModules.insert({M, Reason});
+        !Inserted && Reason <= It->second) {
       return;
+    } else {
+      It->second = Reason;
+    }
 
     std::queue<const Module *> Q;
     Q.push(M);
@@ -202,12 +217,12 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
       // The containing module map is affecting, because it's being pointed
       // into by Module::DefinitionLoc.
       if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid())
-        ModuleMaps.insert(F);
+        AssignMostImportant(ModuleMaps[F], Reason);
       // For inferred modules, the module map that allowed inferring is not
       // related to the virtual containing module map file. It did affect the
       // compilation, though.
       if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid())
-        ModuleMaps.insert(UniqF);
+        AssignMostImportant(ModuleMaps[UniqF], Reason);
 
       for (auto *SubM : Mod->submodules())
         Q.push(SubM);
@@ -216,7 +231,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
   // Handle all the affecting modules referenced from the root module.
 
-  CollectModuleMapsForHierarchy(RootModule);
+  CollectModuleMapsForHierarchy(RootModule, ARImportOrTextualHeader);
 
   std::queue<const Module *> Q;
   Q.push(RootModule);
@@ -225,9 +240,9 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
     Q.pop();
 
     for (const Module *ImportedModule : CurrentModule->Imports)
-      CollectModuleMapsForHierarchy(ImportedModule);
+      CollectModuleMapsForHierarchy(ImportedModule, ARImportOrTextualHeader);
     for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
-      CollectModuleMapsForHierarchy(UndeclaredModule);
+      CollectModuleMapsForHierarchy(UndeclaredModule, ARImportOrTextualHeader);
 
     for (auto *M : CurrentModule->submodules())
       Q.push(M);
@@ -256,7 +271,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
     for (const auto &KH : HS.findResolvedModulesForHeader(*File))
       if (const Module *M = KH.getModule())
-        CollectModuleMapsForHierarchy(M);
+        CollectModuleMapsForHierarchy(M, ARTextualHeader);
   }
 
   // FIXME: This algorithm is not correct for module map hierarchies where
@@ -278,13 +293,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
   // includes a module map defining a module that's not a submodule of X.
 
   llvm::DenseSet<const FileEntry *> ModuleFileEntries;
-  for (FileID MM : ModuleMaps) {
-    if (auto *FE = SM.getFileEntryForID(MM))
+  llvm::DenseSet<FileID> ModuleFileIDs;
+  for (auto [FID, Reason] : ModuleMaps) {
+    if (Reason == ARImportOrTextualHeader)
+      ModuleFileIDs.insert(FID);
+    if (auto *FE = SM.getFileEntryForID(FID))
       ModuleFileEntries.insert(FE);
   }
 
   AffectingModuleMaps R;
-  R.DefinitionFileIDs = std::move(ModuleMaps);
+  R.DefinitionFileIDs = std::move(ModuleFileIDs);
   R.DefinitionFiles = std::move(ModuleFileEntries);
   return std::move(R);
 }
diff --git a/clang/test/Modules/prune-non-affecting-module-map-repeated-textual.cpp b/clang/test/Modules/prune-non-affecting-module-map-repeated-textual.cpp
new file mode 100644
index 00000000000000..30914056c55fa2
--- /dev/null
+++ b/clang/test/Modules/prune-non-affecting-module-map-repeated-textual.cpp
@@ -0,0 +1,104 @@
+// Same as prune-non-affecting-module-map-repeated.cpp, but check that textual-only
+// inclusions do not cause duplication of the module map files they are defined in.
+
+// RUN: rm -rf %t && mkdir %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
+// RUN:   -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mixed.map\
+// RUN:   -fmodule-map-file=%t/mod1.map \
+// RUN:   -fmodule-name=mod1 -emit-module %t/mod1.map -o %t/mod1.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
+// RUN:   -fmodule-map-file=%t/mixed.map\
+// RUN:   -fmodule-name=mixed -emit-module %t/mixed.map -o %t/mixed.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
+// RUN:   -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod1.map -fmodule-map-file=%t/mod2.map \
+// RUN:   -fmodule-file=%t/mod1.pcm -fmodule-file=%t/mixed.pcm \
+// RUN:   -fmodule-name=mod2 -emit-module %t/mod2.map -o %t/mod2.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
+// RUN:   -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod2.map -fmodule-map-file=%t/mod3.map \
+// RUN:   -fmodule-file=%t/mod2.pcm -fmodule-file=%t/mixed.pcm \
+// RUN:   -fmodule-name=mod3 -emit-module %t/mod3.map -o %t/mod3.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \
+// RUN:   -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod3.map -fmodule-map-file=%t/mod4.map \
+// RUN:   -fmodule-file=%t/mod3.pcm \
+// RUN:   -fmodule-name=mod4 -emit-module %t/mod4.map -o %t/mod4.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod4.map -fmodule-file=%t/mod4.pcm -fsyntax-only -verify %t/check_slocs.cc
+
+//--- base.map
+module base { textual header "vector.h" }
+//--- mixed.map
+module mixed { textual header "mixed_text.h" header "mixed_mod.h"}
+//--- mod1.map
+module mod1 { header "mod1.h" }
+//--- mod2.map
+module mod2 { header "mod2.h" }
+//--- mod3.map
+module mod3 { header "mod3.h" }
+//--- mod4.map
+module mod4 { header "mod4.h" }
+//--- check_slocs.cc
+#include "mod4.h"
+#include "vector.h"
+#pragma clang __debug sloc_usage // expected-remark {{source manager location address space usage}}
+// expected-note@* {{% of available space}}
+
+// base.map must only be present once, despite being used in each module.
+// Because its location in every module compile should be non-affecting.
+
+// expected-note at base.map:1 {{file entered 1 time}}
+
+// different modules use either only textual header from mixed.map or both textual and modular
+// headers. Either combination must lead to only 1 use at the end, because the module is ultimately
+// in the import chain and any textual uses should not change that.
+
+// expected-note at mixed.map:1 {{file entered 1 time}}
+
+// expected-note@* + {{file entered}}
+
+
+//--- vector.h
+#ifndef VECTOR_H
+#define VECTOR_H
+#endif
+
+//--- mixed_text.h
+#ifndef MIXED_TEXT_H
+#define MIXED_TEXT_H
+#endif
+//--- mixed_mod.h
+#ifndef MIXED_MOD_H
+#define MIXED_MOD_H
+#endif
+
+//--- mod1.h
+#ifndef MOD1
+#define MOD1
+#include "vector.h"
+#include "mixed_text.h"
+int mod1();
+#endif
+//--- mod2.h
+#ifndef MOD2
+#define MOD2
+#include "vector.h"
+#include "mod1.h"
+#include "mixed_mod.h"
+int mod2();
+#endif
+//--- mod3.h
+#ifndef MOD3
+#define MOD3
+#include "vector.h"
+#include "mod2.h"
+#include "mixed_text.h"
+#include "mixed_mod.h"
+int mod3();
+#endif
+//--- mod4.h
+#ifndef MOD4
+#define MOD4
+#include "vector.h"
+#include "mod3.h"
+int mod4();
+#endif

>From e0a183504025dcc8a6ee81b09ad9e547b68e7d1d Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 15 Nov 2024 13:43:54 +0100
Subject: [PATCH 2/5] fixup! [ASTWriter] Do not allocate source location space
 for module maps used only for textual headers

Format the code
---
 clang/lib/Serialization/ASTWriter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a0d0e03178296d..9b81d7c53fc170 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -195,7 +195,8 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
   };
   llvm::DenseMap<FileID, AffectedReason> ModuleMaps;
   llvm::DenseMap<const Module *, AffectedReason> ProcessedModules;
-  auto CollectModuleMapsForHierarchy = [&](const Module *M, AffectedReason Reason) {
+  auto CollectModuleMapsForHierarchy = [&](const Module *M,
+                                           AffectedReason Reason) {
     M = M->getTopLevelModule();
 
     // We need to process the header either when it was not present of when we

>From 5611185ec4f0976963b8fc052334ba5607e0cf84 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 29 Nov 2024 17:04:11 +0100
Subject: [PATCH 3/5] fixup! [ASTWriter] Do not allocate source location space
 for module maps used only for textual headers

fix a typo in the comment
---
 clang/lib/Serialization/ASTWriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 9b81d7c53fc170..44b28cf988c560 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -199,7 +199,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
                                            AffectedReason Reason) {
     M = M->getTopLevelModule();
 
-    // We need to process the header either when it was not present of when we
+    // We need to process the header either when it was not present or when we
     // previously flagged module map as textual headers and now we found a
     // proper import.
     if (auto [It, Inserted] = ProcessedModules.insert({M, Reason});

>From 9cad08812c68b3790963d346c509adc9a801046f Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 29 Nov 2024 17:05:29 +0100
Subject: [PATCH 4/5] fixup! [ASTWriter] Do not allocate source location space
 for module maps used only for textual headers

Add _ to the enumerator names
---
 clang/lib/Serialization/ASTWriter.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 44b28cf988c560..edc22621058cd4 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -187,8 +187,8 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
   // Module maps used only by textual headers are special. Their FileID is
   // non-affecting, but their FileEntry is (i.e. must be written as InputFile).
   enum AffectedReason : bool {
-    ARTextualHeader = 0,
-    ARImportOrTextualHeader = 1,
+    AR_TextualHeader = 0,
+    AR_ImportOrTextualHeader = 1,
   };
   auto AssignMostImportant = [](AffectedReason &L, AffectedReason R) {
     L = std::max(L, R);
@@ -232,7 +232,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
   // Handle all the affecting modules referenced from the root module.
 
-  CollectModuleMapsForHierarchy(RootModule, ARImportOrTextualHeader);
+  CollectModuleMapsForHierarchy(RootModule, AR_ImportOrTextualHeader);
 
   std::queue<const Module *> Q;
   Q.push(RootModule);
@@ -241,9 +241,9 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
     Q.pop();
 
     for (const Module *ImportedModule : CurrentModule->Imports)
-      CollectModuleMapsForHierarchy(ImportedModule, ARImportOrTextualHeader);
+      CollectModuleMapsForHierarchy(ImportedModule, AR_ImportOrTextualHeader);
     for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
-      CollectModuleMapsForHierarchy(UndeclaredModule, ARImportOrTextualHeader);
+      CollectModuleMapsForHierarchy(UndeclaredModule, AR_ImportOrTextualHeader);
 
     for (auto *M : CurrentModule->submodules())
       Q.push(M);
@@ -272,7 +272,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
     for (const auto &KH : HS.findResolvedModulesForHeader(*File))
       if (const Module *M = KH.getModule())
-        CollectModuleMapsForHierarchy(M, ARTextualHeader);
+        CollectModuleMapsForHierarchy(M, AR_TextualHeader);
   }
 
   // FIXME: This algorithm is not correct for module map hierarchies where
@@ -296,7 +296,7 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
   llvm::DenseSet<const FileEntry *> ModuleFileEntries;
   llvm::DenseSet<FileID> ModuleFileIDs;
   for (auto [FID, Reason] : ModuleMaps) {
-    if (Reason == ARImportOrTextualHeader)
+    if (Reason == AR_ImportOrTextualHeader)
       ModuleFileIDs.insert(FID);
     if (auto *FE = SM.getFileEntryForID(FID))
       ModuleFileEntries.insert(FE);

>From 441efe6ea1638e874f1d2cb2a8564207b0dc1134 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 29 Nov 2024 17:07:11 +0100
Subject: [PATCH 5/5] fixup! [ASTWriter] Do not allocate source location space
 for module maps used only for textual headers

Change to LHS and RHS
---
 clang/lib/Serialization/ASTWriter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index edc22621058cd4..5be0f0a0a8e840 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -190,8 +190,8 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
     AR_TextualHeader = 0,
     AR_ImportOrTextualHeader = 1,
   };
-  auto AssignMostImportant = [](AffectedReason &L, AffectedReason R) {
-    L = std::max(L, R);
+  auto AssignMostImportant = [](AffectedReason &LHS, AffectedReason RHS) {
+    LHS = std::max(LHS, RHS);
   };
   llvm::DenseMap<FileID, AffectedReason> ModuleMaps;
   llvm::DenseMap<const Module *, AffectedReason> ProcessedModules;



More information about the cfe-commits mailing list