[clang] [clang][modules] Allow including module maps to be non-affecting (PR #89992)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 24 14:18:16 PDT 2024
https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/89992
None
>From e16becb9357fd560a1c9c673389e6dbba02e024c Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 24 Apr 2024 11:12:28 -0700
Subject: [PATCH] [clang][modules] Allow including module maps to be
non-affecting
---
.../include/clang/Serialization/ASTBitCodes.h | 4 +-
clang/include/clang/Serialization/ASTWriter.h | 9 ++
.../include/clang/Serialization/ModuleFile.h | 1 +
clang/lib/Serialization/ASTReader.cpp | 5 +-
clang/lib/Serialization/ASTWriter.cpp | 109 ++++++++++++------
.../DependencyScanning/ModuleDepCollector.cpp | 2 +-
.../ClangScanDeps/modules-extern-unrelated.m | 17 +--
7 files changed, 97 insertions(+), 50 deletions(-)
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c19677..9e90db513fde44 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
-const unsigned VERSION_MAJOR = 30;
+const unsigned VERSION_MAJOR = 31;
/// AST file minor version number supported by this version of
/// Clang.
@@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 30;
/// for the previous version could still support reading the new
/// version by ignoring new kinds of subblocks), this number
/// should be increased.
-const unsigned VERSION_MINOR = 1;
+const unsigned VERSION_MINOR = 0;
/// An ID number that refers to an identifier in an AST file.
///
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 13b4ad4ad2953d..a9012837c30620 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -76,6 +76,10 @@ class StoredDeclsList;
class SwitchCase;
class Token;
+namespace SrcMgr {
+class FileInfo;
+} // namespace SrcMgr
+
/// Writes an AST file containing the contents of a translation unit.
///
/// The ASTWriter class produces a bitstream containing the serialized
@@ -491,6 +495,11 @@ class ASTWriter : public ASTDeserializationListener,
/// during \c SourceManager serialization.
void computeNonAffectingInputFiles();
+ /// Some affecting files can be included from files that are not considered
+ /// affecting. This function erases such source locations.
+ SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr,
+ const SrcMgr::FileInfo &File);
+
/// Returns an adjusted \c FileID, accounting for any non-affecting input
/// files.
FileID getAdjustedFileID(FileID FID) const;
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 492c35dceb08d4..84fb2e8b1e4842 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -69,6 +69,7 @@ struct InputFileInfo {
bool Overridden;
bool Transient;
bool TopLevel;
+ bool TopLevelAmongAffecting;
bool ModuleMap;
};
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 43b69045bb0543..2a726ee7fb4491 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2444,9 +2444,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
R.Overridden = static_cast<bool>(Record[3]);
R.Transient = static_cast<bool>(Record[4]);
R.TopLevel = static_cast<bool>(Record[5]);
- R.ModuleMap = static_cast<bool>(Record[6]);
+ R.TopLevelAmongAffecting = static_cast<bool>(Record[6]);
+ R.ModuleMap = static_cast<bool>(Record[7]);
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
- uint16_t AsRequestedLength = Record[7];
+ uint16_t AsRequestedLength = Record[8];
std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
std::string Name = Blob.substr(AsRequestedLength).str();
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 30195868ca9965..aa235d9bd8d824 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -173,57 +173,53 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
const HeaderSearch &HS = PP.getHeaderSearchInfo();
const ModuleMap &MM = HS.getModuleMap();
- const SourceManager &SourceMgr = PP.getSourceManager();
std::set<const FileEntry *> ModuleMaps;
- auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) {
- if (!ModuleMaps.insert(F).second)
+ std::set<const Module *> ProcessedModules;
+ auto CollectModuleMapsForHierarchy = [&](const Module *M) {
+ M = M->getTopLevelModule();
+
+ if (!ProcessedModules.insert(M).second)
return;
- SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
- // The include location of inferred module maps can point into the header
- // file that triggered the inferring. Cut off the walk if that's the case.
- while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
- FID = SourceMgr.getFileID(Loc);
- F = *SourceMgr.getFileEntryRefForID(FID);
- if (!ModuleMaps.insert(F).second)
- break;
- Loc = SourceMgr.getIncludeLoc(FID);
- }
- };
- std::set<const Module *> ProcessedModules;
- auto CollectIncludingMapsFromAncestors = [&](const Module *M) {
- for (const Module *Mod = M; Mod; Mod = Mod->Parent) {
- if (!ProcessedModules.insert(Mod).second)
- break;
+ std::queue<const Module *> Q;
+ Q.push(M);
+ while (!Q.empty()) {
+ const Module *Mod = Q.front();
+ Q.pop();
+
// The containing module map is affecting, because it's being pointed
// into by Module::DefinitionLoc.
- if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid())
- CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
+ if (auto FE = MM.getContainingModuleMapFile(Mod); FE)
+ ModuleMaps.insert(*FE);
// For inferred modules, the module map that allowed inferring is not in
// the include chain of the virtual containing module map file. It did
// affect the compilation, though.
- if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid())
- CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
+ if (auto FE = MM.getModuleMapFileForUniquing(Mod); FE)
+ ModuleMaps.insert(*FE);
+
+ for (auto *SubM : Mod->submodules())
+ Q.push(SubM);
}
};
// Handle all the affecting modules referenced from the root module.
+ CollectModuleMapsForHierarchy(RootModule);
+
std::queue<const Module *> Q;
Q.push(RootModule);
while (!Q.empty()) {
- const Module *CurrentModule = Q.front();
+ const Module *RootSubmodule = Q.front();
Q.pop();
- CollectIncludingMapsFromAncestors(CurrentModule);
- for (const Module *ImportedModule : CurrentModule->Imports)
- CollectIncludingMapsFromAncestors(ImportedModule);
- for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
- CollectIncludingMapsFromAncestors(UndeclaredModule);
+ for (const Module *ImportedModule : RootSubmodule->Imports)
+ CollectModuleMapsForHierarchy(ImportedModule);
+ for (const Module *UndeclaredModule : RootSubmodule->UndeclaredUses)
+ CollectModuleMapsForHierarchy(UndeclaredModule);
- for (auto *M : CurrentModule->submodules())
- Q.push(M);
+ for (auto *SubM : RootSubmodule->submodules())
+ Q.push(SubM);
}
// Handle textually-included headers that belong to other modules.
@@ -249,9 +245,39 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
for (const auto &KH : HS.findResolvedModulesForHeader(*File))
if (const Module *M = KH.getModule())
- CollectIncludingMapsFromAncestors(M);
+ CollectModuleMapsForHierarchy(M);
}
+ // Note: This algorithm doesn't handle all edge-cases. Let's say something
+ // from \c ModulesToProcess imports X. The algorithm doesn't pick up
+ // Y.modulemap as affecting in the following cases:
+
+ //--- X.modulemap
+ // module X { header "X.h" }
+ // extern module Y "Y.modulemap
+ //--- Y.modulemap
+ // module Y { header "Y.h" }
+ //
+ // Since SourceManager doesn't keep list of files included from X.modulemap,
+ // we would need to walk up the include chain from all leaf files to figure
+ // out if they are reachable from X.modulemap and if so mark all module map
+ // files in the chain as affecting.
+
+ //--- Y.modulemap
+ // module Y { header "Y.h" }
+ // extern module X "X.modulemap"
+ //--- X.modulemap
+ // module X { header "X.h" }
+ // module Y.Sub { header "Y_Sub.h" }
+ //
+ // Only considering X.modulemap affecting means that parsing it without having
+ // access to Y.modulemap fails. There is no way to find out this dependence
+ // since ModuleMap does not keep a mapping from a module map file to the
+ // modules it defines. We could always consider the including module map as
+ // affecting, but that is problematic on MacOS, where X might be something
+ // like Darwin whose parent module map includes lots of other module maps that
+ // describe unrelated top-level modules.
+
return ModuleMaps;
}
@@ -1631,6 +1657,7 @@ struct InputFileEntry {
bool IsTransient;
bool BufferOverridden;
bool IsTopLevel;
+ bool IsTopLevelAmongAffecting;
bool IsModuleMap;
uint32_t ContentHash[2];
@@ -1639,6 +1666,18 @@ struct InputFileEntry {
} // namespace
+SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr,
+ const SrcMgr::FileInfo &File) {
+ SourceLocation IncludeLoc = File.getIncludeLoc();
+ if (IncludeLoc.isValid()) {
+ FileID IncludeFID = SourceMgr.getFileID(IncludeLoc);
+ assert(IncludeFID.isValid() && "IncludeLoc in invalid file");
+ if (!IsSLocAffecting[IncludeFID.ID])
+ IncludeLoc = SourceLocation();
+ }
+ return IncludeLoc;
+}
+
void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
HeaderSearchOptions &HSOpts) {
using namespace llvm;
@@ -1654,6 +1693,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level
+ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name
@@ -1693,6 +1733,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
+ Entry.IsTopLevelAmongAffecting =
+ getAffectingIncludeLoc(SourceMgr, File).isInvalid();
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());
auto ContentHash = hash_code(-1);
@@ -1758,6 +1800,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.BufferOverridden,
Entry.IsTransient,
Entry.IsTopLevel,
+ Entry.IsTopLevelAmongAffecting,
Entry.IsModuleMap,
NameAsRequested.size()};
@@ -2219,7 +2262,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
SLocEntryOffsets.push_back(Offset);
// Starting offset of this entry within this module, so skip the dummy.
Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
- AddSourceLocation(File.getIncludeLoc(), Record);
+ AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record);
Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
Record.push_back(File.hasLineDirectives());
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index f46324ee9989eb..ae8cb664ca14b5 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
*MF, /*IncludeSystem=*/true,
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
- if (!(IFI.TopLevel && IFI.ModuleMap))
+ if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap))
return;
if (StringRef(IFI.FilenameAsRequested)
.ends_with("__inferred_module.map"))
diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m
index 76611c596d3eff..90c494991343e5 100644
--- a/clang/test/ClangScanDeps/modules-extern-unrelated.m
+++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m
@@ -22,15 +22,8 @@
//--- second/second.h
#include "first_other.h"
-//--- cdb.json.template
-[{
- "directory": "DIR",
- "file": "DIR/tu.m",
- "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o"
-}]
-
-// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json
-// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json
+// RUN: clang-scan-deps -format experimental-full -o %t/result.json \
+// RUN: -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
// CHECK: {
@@ -67,11 +60,11 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap",
// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/second/second.h",
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap"
// CHECK-NEXT: ],
@@ -90,11 +83,11 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap",
// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap",
// CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h"
@@ -115,7 +108,7 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "command-line": [
// CHECK: ],
-// CHECK-NEXT: "executable": "clang",
+// CHECK-NEXT: "executable": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/tu.m"
// CHECK-NEXT: ],
More information about the cfe-commits
mailing list