[clang] [clang][modules] Allow including module maps to be non-affecting (PR #89992)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 24 14:18:16 PDT 2024


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/89992

None

>From e16becb9357fd560a1c9c673389e6dbba02e024c Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 24 Apr 2024 11:12:28 -0700
Subject: [PATCH] [clang][modules] Allow including module maps to be
 non-affecting

---
 .../include/clang/Serialization/ASTBitCodes.h |   4 +-
 clang/include/clang/Serialization/ASTWriter.h |   9 ++
 .../include/clang/Serialization/ModuleFile.h  |   1 +
 clang/lib/Serialization/ASTReader.cpp         |   5 +-
 clang/lib/Serialization/ASTWriter.cpp         | 109 ++++++++++++------
 .../DependencyScanning/ModuleDepCollector.cpp |   2 +-
 .../ClangScanDeps/modules-extern-unrelated.m  |  17 +--
 7 files changed, 97 insertions(+), 50 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c19677..9e90db513fde44 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 30;
+const unsigned VERSION_MAJOR = 31;
 
 /// AST file minor version number supported by this version of
 /// Clang.
@@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 30;
 /// for the previous version could still support reading the new
 /// version by ignoring new kinds of subblocks), this number
 /// should be increased.
-const unsigned VERSION_MINOR = 1;
+const unsigned VERSION_MINOR = 0;
 
 /// An ID number that refers to an identifier in an AST file.
 ///
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 13b4ad4ad2953d..a9012837c30620 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -76,6 +76,10 @@ class StoredDeclsList;
 class SwitchCase;
 class Token;
 
+namespace SrcMgr {
+class FileInfo;
+} // namespace SrcMgr
+
 /// Writes an AST file containing the contents of a translation unit.
 ///
 /// The ASTWriter class produces a bitstream containing the serialized
@@ -491,6 +495,11 @@ class ASTWriter : public ASTDeserializationListener,
   /// during \c SourceManager serialization.
   void computeNonAffectingInputFiles();
 
+  /// Some affecting files can be included from files that are not considered
+  /// affecting. This function erases such source locations.
+  SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr,
+                                        const SrcMgr::FileInfo &File);
+
   /// Returns an adjusted \c FileID, accounting for any non-affecting input
   /// files.
   FileID getAdjustedFileID(FileID FID) const;
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 492c35dceb08d4..84fb2e8b1e4842 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -69,6 +69,7 @@ struct InputFileInfo {
   bool Overridden;
   bool Transient;
   bool TopLevel;
+  bool TopLevelAmongAffecting;
   bool ModuleMap;
 };
 
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 43b69045bb0543..2a726ee7fb4491 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2444,9 +2444,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
   R.Overridden = static_cast<bool>(Record[3]);
   R.Transient = static_cast<bool>(Record[4]);
   R.TopLevel = static_cast<bool>(Record[5]);
-  R.ModuleMap = static_cast<bool>(Record[6]);
+  R.TopLevelAmongAffecting = static_cast<bool>(Record[6]);
+  R.ModuleMap = static_cast<bool>(Record[7]);
   std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
-    uint16_t AsRequestedLength = Record[7];
+    uint16_t AsRequestedLength = Record[8];
 
     std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
     std::string Name = Blob.substr(AsRequestedLength).str();
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 30195868ca9965..aa235d9bd8d824 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -173,57 +173,53 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
   const HeaderSearch &HS = PP.getHeaderSearchInfo();
   const ModuleMap &MM = HS.getModuleMap();
-  const SourceManager &SourceMgr = PP.getSourceManager();
 
   std::set<const FileEntry *> ModuleMaps;
-  auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) {
-    if (!ModuleMaps.insert(F).second)
+  std::set<const Module *> ProcessedModules;
+  auto CollectModuleMapsForHierarchy = [&](const Module *M) {
+    M = M->getTopLevelModule();
+
+    if (!ProcessedModules.insert(M).second)
       return;
-    SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
-    // The include location of inferred module maps can point into the header
-    // file that triggered the inferring. Cut off the walk if that's the case.
-    while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
-      FID = SourceMgr.getFileID(Loc);
-      F = *SourceMgr.getFileEntryRefForID(FID);
-      if (!ModuleMaps.insert(F).second)
-        break;
-      Loc = SourceMgr.getIncludeLoc(FID);
-    }
-  };
 
-  std::set<const Module *> ProcessedModules;
-  auto CollectIncludingMapsFromAncestors = [&](const Module *M) {
-    for (const Module *Mod = M; Mod; Mod = Mod->Parent) {
-      if (!ProcessedModules.insert(Mod).second)
-        break;
+    std::queue<const Module *> Q;
+    Q.push(M);
+    while (!Q.empty()) {
+      const Module *Mod = Q.front();
+      Q.pop();
+
       // The containing module map is affecting, because it's being pointed
       // into by Module::DefinitionLoc.
-      if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid())
-        CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
+      if (auto FE = MM.getContainingModuleMapFile(Mod); FE)
+        ModuleMaps.insert(*FE);
       // For inferred modules, the module map that allowed inferring is not in
       // the include chain of the virtual containing module map file. It did
       // affect the compilation, though.
-      if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid())
-        CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
+      if (auto FE = MM.getModuleMapFileForUniquing(Mod); FE)
+        ModuleMaps.insert(*FE);
+
+      for (auto *SubM : Mod->submodules())
+        Q.push(SubM);
     }
   };
 
   // Handle all the affecting modules referenced from the root module.
 
+  CollectModuleMapsForHierarchy(RootModule);
+
   std::queue<const Module *> Q;
   Q.push(RootModule);
   while (!Q.empty()) {
-    const Module *CurrentModule = Q.front();
+    const Module *RootSubmodule = Q.front();
     Q.pop();
 
-    CollectIncludingMapsFromAncestors(CurrentModule);
-    for (const Module *ImportedModule : CurrentModule->Imports)
-      CollectIncludingMapsFromAncestors(ImportedModule);
-    for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
-      CollectIncludingMapsFromAncestors(UndeclaredModule);
+    for (const Module *ImportedModule : RootSubmodule->Imports)
+      CollectModuleMapsForHierarchy(ImportedModule);
+    for (const Module *UndeclaredModule : RootSubmodule->UndeclaredUses)
+      CollectModuleMapsForHierarchy(UndeclaredModule);
 
-    for (auto *M : CurrentModule->submodules())
-      Q.push(M);
+    for (auto *SubM : RootSubmodule->submodules())
+      Q.push(SubM);
   }
 
   // Handle textually-included headers that belong to other modules.
@@ -249,9 +245,39 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
 
     for (const auto &KH : HS.findResolvedModulesForHeader(*File))
       if (const Module *M = KH.getModule())
-        CollectIncludingMapsFromAncestors(M);
+        CollectModuleMapsForHierarchy(M);
   }
 
+  // Note: This algorithm doesn't handle all edge-cases. Let's say something
+  // from \c ModulesToProcess imports X. The algorithm doesn't pick up
+  // Y.modulemap as affecting in the following cases:
+
+  //--- X.modulemap
+  // module X { header "X.h" }
+  // extern module Y "Y.modulemap
+  //--- Y.modulemap
+  // module Y { header "Y.h" }
+  //
+  // Since SourceManager doesn't keep list of files included from X.modulemap,
+  // we would need to walk up the include chain from all leaf files to figure
+  // out if they are reachable from X.modulemap and if so mark all module map
+  // files in the chain as affecting.
+
+  //--- Y.modulemap
+  // module Y { header "Y.h" }
+  // extern module X "X.modulemap"
+  //--- X.modulemap
+  // module X { header "X.h" }
+  // module Y.Sub { header "Y_Sub.h" }
+  //
+  // Only considering X.modulemap affecting means that parsing it without having
+  // access to Y.modulemap fails. There is no way to find out this dependence
+  // since ModuleMap does not keep a mapping from a module map file to the
+  // modules it defines. We could always consider the including module map as
+  // affecting, but that is problematic on MacOS, where X might be something
+  // like Darwin whose parent module map includes lots of other module maps that
+  // describe unrelated top-level modules.
+
   return ModuleMaps;
 }
 
@@ -1631,6 +1657,7 @@ struct InputFileEntry {
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevel;
+  bool IsTopLevelAmongAffecting;
   bool IsModuleMap;
   uint32_t ContentHash[2];
 
@@ -1639,6 +1666,18 @@ struct InputFileEntry {
 
 } // namespace
 
+SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr,
+                                                 const SrcMgr::FileInfo &File) {
+  SourceLocation IncludeLoc = File.getIncludeLoc();
+  if (IncludeLoc.isValid()) {
+    FileID IncludeFID = SourceMgr.getFileID(IncludeLoc);
+    assert(IncludeFID.isValid() && "IncludeLoc in invalid file");
+    if (!IsSLocAffecting[IncludeFID.ID])
+      IncludeLoc = SourceLocation();
+  }
+  return IncludeLoc;
+}
+
 void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
                                 HeaderSearchOptions &HSOpts) {
   using namespace llvm;
@@ -1654,6 +1693,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level
+  IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name
@@ -1693,6 +1733,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     Entry.IsTransient = Cache->IsTransient;
     Entry.BufferOverridden = Cache->BufferOverridden;
     Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
+    Entry.IsTopLevelAmongAffecting =
+        getAffectingIncludeLoc(SourceMgr, File).isInvalid();
     Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());
 
     auto ContentHash = hash_code(-1);
@@ -1758,6 +1800,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
           Entry.BufferOverridden,
           Entry.IsTransient,
           Entry.IsTopLevel,
+          Entry.IsTopLevelAmongAffecting,
           Entry.IsModuleMap,
           NameAsRequested.size()};
 
@@ -2219,7 +2262,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
       SLocEntryOffsets.push_back(Offset);
       // Starting offset of this entry within this module, so skip the dummy.
       Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
-      AddSourceLocation(File.getIncludeLoc(), Record);
+      AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record);
       Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
       Record.push_back(File.hasLineDirectives());
 
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index f46324ee9989eb..ae8cb664ca14b5 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
       *MF, /*IncludeSystem=*/true,
       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
-        if (!(IFI.TopLevel && IFI.ModuleMap))
+        if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap))
           return;
         if (StringRef(IFI.FilenameAsRequested)
                 .ends_with("__inferred_module.map"))
diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m
index 76611c596d3eff..90c494991343e5 100644
--- a/clang/test/ClangScanDeps/modules-extern-unrelated.m
+++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m
@@ -22,15 +22,8 @@
 //--- second/second.h
 #include "first_other.h"
 
-//--- cdb.json.template
-[{
-  "directory": "DIR",
-  "file": "DIR/tu.m",
-  "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o"
-}]
-
-// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json
-// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json
+// RUN: clang-scan-deps -format experimental-full -o %t/result.json \
+// RUN:   -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o
 // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
 
 // CHECK:      {
@@ -67,11 +60,11 @@
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap",
 // CHECK-NEXT:       "command-line": [
+// CHECK-NOT:          "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
 // CHECK:            ],
 // CHECK-NEXT:       "context-hash": "{{.*}}",
 // CHECK-NEXT:       "file-deps": [
 // CHECK-NEXT:         "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT:         "[[PREFIX]]/second/module.modulemap",
 // CHECK-NEXT:         "[[PREFIX]]/second/second.h",
 // CHECK-NEXT:         "[[PREFIX]]/second/second.modulemap"
 // CHECK-NEXT:       ],
@@ -90,11 +83,11 @@
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap",
 // CHECK-NEXT:       "command-line": [
+// CHECK-NOT:          "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
 // CHECK:            ],
 // CHECK-NEXT:       "context-hash": "{{.*}}",
 // CHECK-NEXT:       "file-deps": [
 // CHECK-NEXT:         "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT:         "[[PREFIX]]/second/module.modulemap",
 // CHECK-NEXT:         "[[PREFIX]]/second/second.modulemap",
 // CHECK-NEXT:         "[[PREFIX]]/zeroth/module.modulemap",
 // CHECK-NEXT:         "[[PREFIX]]/zeroth/zeroth.h"
@@ -115,7 +108,7 @@
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "command-line": [
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
+// CHECK-NEXT:           "executable": "{{.*}}",
 // CHECK-NEXT:           "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.m"
 // CHECK-NEXT:           ],



More information about the cfe-commits mailing list