[clang] [clang][modules] Allow including module maps to be non-affecting (PR #89992)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 24 19:57:06 PDT 2024
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/89992
>From 6e77e37977bbecc8053d12b4db3f790042b8f34d Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 24 Apr 2024 11:12:28 -0700
Subject: [PATCH] [clang][modules] Allow including module maps to be
non-affecting
---
.../include/clang/Serialization/ASTBitCodes.h | 4 +-
clang/include/clang/Serialization/ASTWriter.h | 9 ++
.../include/clang/Serialization/ModuleFile.h | 1 +
clang/lib/Serialization/ASTReader.cpp | 5 +-
clang/lib/Serialization/ASTWriter.cpp | 93 ++++++++++++-------
.../DependencyScanning/ModuleDepCollector.cpp | 2 +-
.../ClangScanDeps/modules-extern-unrelated.m | 20 ++--
7 files changed, 86 insertions(+), 48 deletions(-)
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c19677..9e90db513fde44 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
-const unsigned VERSION_MAJOR = 30;
+const unsigned VERSION_MAJOR = 31;
/// AST file minor version number supported by this version of
/// Clang.
@@ -51,7 +51,7 @@ const unsigned VERSION_MAJOR = 30;
/// for the previous version could still support reading the new
/// version by ignoring new kinds of subblocks), this number
/// should be increased.
-const unsigned VERSION_MINOR = 1;
+const unsigned VERSION_MINOR = 0;
/// An ID number that refers to an identifier in an AST file.
///
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 13b4ad4ad2953d..76730803e99b92 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -76,6 +76,10 @@ class StoredDeclsList;
class SwitchCase;
class Token;
+namespace SrcMgr {
+class FileInfo;
+} // namespace SrcMgr
+
/// Writes an AST file containing the contents of a translation unit.
///
/// The ASTWriter class produces a bitstream containing the serialized
@@ -491,6 +495,11 @@ class ASTWriter : public ASTDeserializationListener,
/// during \c SourceManager serialization.
void computeNonAffectingInputFiles();
+ /// Some affecting files can be included from files that are not affecting.
+ /// This function erases source locations pointing into such files.
+ SourceLocation getAffectingIncludeLoc(const SourceManager &SourceMgr,
+ const SrcMgr::FileInfo &File);
+
/// Returns an adjusted \c FileID, accounting for any non-affecting input
/// files.
FileID getAdjustedFileID(FileID FID) const;
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 492c35dceb08d4..84fb2e8b1e4842 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -69,6 +69,7 @@ struct InputFileInfo {
bool Overridden;
bool Transient;
bool TopLevel;
+ bool TopLevelAmongAffecting;
bool ModuleMap;
};
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 43b69045bb0543..2a726ee7fb4491 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2444,9 +2444,10 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
R.Overridden = static_cast<bool>(Record[3]);
R.Transient = static_cast<bool>(Record[4]);
R.TopLevel = static_cast<bool>(Record[5]);
- R.ModuleMap = static_cast<bool>(Record[6]);
+ R.TopLevelAmongAffecting = static_cast<bool>(Record[6]);
+ R.ModuleMap = static_cast<bool>(Record[7]);
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
- uint16_t AsRequestedLength = Record[7];
+ uint16_t AsRequestedLength = Record[8];
std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
std::string Name = Blob.substr(AsRequestedLength).str();
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 30195868ca9965..a6f586f0106773 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -173,54 +173,50 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
const HeaderSearch &HS = PP.getHeaderSearchInfo();
const ModuleMap &MM = HS.getModuleMap();
- const SourceManager &SourceMgr = PP.getSourceManager();
std::set<const FileEntry *> ModuleMaps;
- auto CollectIncludingModuleMaps = [&](FileID FID, FileEntryRef F) {
- if (!ModuleMaps.insert(F).second)
+ std::set<const Module *> ProcessedModules;
+ auto CollectModuleMapsForHierarchy = [&](const Module *M) {
+ M = M->getTopLevelModule();
+
+ if (!ProcessedModules.insert(M).second)
return;
- SourceLocation Loc = SourceMgr.getIncludeLoc(FID);
- // The include location of inferred module maps can point into the header
- // file that triggered the inferring. Cut off the walk if that's the case.
- while (Loc.isValid() && isModuleMap(SourceMgr.getFileCharacteristic(Loc))) {
- FID = SourceMgr.getFileID(Loc);
- F = *SourceMgr.getFileEntryRefForID(FID);
- if (!ModuleMaps.insert(F).second)
- break;
- Loc = SourceMgr.getIncludeLoc(FID);
- }
- };
- std::set<const Module *> ProcessedModules;
- auto CollectIncludingMapsFromAncestors = [&](const Module *M) {
- for (const Module *Mod = M; Mod; Mod = Mod->Parent) {
- if (!ProcessedModules.insert(Mod).second)
- break;
+ std::queue<const Module *> Q;
+ Q.push(M);
+ while (!Q.empty()) {
+ const Module *Mod = Q.front();
+ Q.pop();
+
// The containing module map is affecting, because it's being pointed
// into by Module::DefinitionLoc.
- if (FileID FID = MM.getContainingModuleMapFileID(Mod); FID.isValid())
- CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
- // For inferred modules, the module map that allowed inferring is not in
- // the include chain of the virtual containing module map file. It did
- // affect the compilation, though.
- if (FileID FID = MM.getModuleMapFileIDForUniquing(Mod); FID.isValid())
- CollectIncludingModuleMaps(FID, *SourceMgr.getFileEntryRefForID(FID));
+ if (auto FE = MM.getContainingModuleMapFile(Mod))
+ ModuleMaps.insert(*FE);
+ // For inferred modules, the module map that allowed inferring is not
+ // related to the virtual containing module map file. It did affect the
+ // compilation, though.
+ if (auto FE = MM.getModuleMapFileForUniquing(Mod))
+ ModuleMaps.insert(*FE);
+
+ for (auto *SubM : Mod->submodules())
+ Q.push(SubM);
}
};
// Handle all the affecting modules referenced from the root module.
+ CollectModuleMapsForHierarchy(RootModule);
+
std::queue<const Module *> Q;
Q.push(RootModule);
while (!Q.empty()) {
const Module *CurrentModule = Q.front();
Q.pop();
- CollectIncludingMapsFromAncestors(CurrentModule);
for (const Module *ImportedModule : CurrentModule->Imports)
- CollectIncludingMapsFromAncestors(ImportedModule);
+ CollectModuleMapsForHierarchy(ImportedModule);
for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses)
- CollectIncludingMapsFromAncestors(UndeclaredModule);
+ CollectModuleMapsForHierarchy(UndeclaredModule);
for (auto *M : CurrentModule->submodules())
Q.push(M);
@@ -249,9 +245,27 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
for (const auto &KH : HS.findResolvedModulesForHeader(*File))
if (const Module *M = KH.getModule())
- CollectIncludingMapsFromAncestors(M);
+ CollectModuleMapsForHierarchy(M);
}
+ // FIXME: This algorithm is not correct for module map hierarchies where
+ // module map file defining a (sub)module of a top-level module X includes
+ // a module map file that defines a (sub)module of another top-level module Y.
+ // Whenever X is affecting and Y is not, "replaying" this PCM file will fail
+ // when parsing module map files for X due to not knowing about the `extern`
+ // module map for Y.
+ //
+ // We don't have a good way to fix it here. We could mark all children of
+ // affecting module map files as being affecting as well, but that's
+ // expensive. SourceManager does not model the edge from parent to child
+ // SLocEntries, so instead, we would need to iterate over leaf module map
+ // files, walk up their include hierarchy and check whether we arrive at an
+ // affecting module map.
+ //
+ // Instead of complicating and slowing down this function, we should probably
+ // just ban module map hierarchies where module map defining a (sub)module X
+ // includes a module map defining a module that's not a submodule of X.
+
return ModuleMaps;
}
@@ -1631,6 +1645,7 @@ struct InputFileEntry {
bool IsTransient;
bool BufferOverridden;
bool IsTopLevel;
+ bool IsTopLevelAmongAffecting;
bool IsModuleMap;
uint32_t ContentHash[2];
@@ -1639,6 +1654,18 @@ struct InputFileEntry {
} // namespace
+SourceLocation ASTWriter::getAffectingIncludeLoc(const SourceManager &SourceMgr,
+ const SrcMgr::FileInfo &File) {
+ SourceLocation IncludeLoc = File.getIncludeLoc();
+ if (IncludeLoc.isValid()) {
+ FileID IncludeFID = SourceMgr.getFileID(IncludeLoc);
+ assert(IncludeFID.isValid() && "IncludeLoc in invalid file");
+ if (!IsSLocAffecting[IncludeFID.ID])
+ IncludeLoc = SourceLocation();
+ }
+ return IncludeLoc;
+}
+
void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
HeaderSearchOptions &HSOpts) {
using namespace llvm;
@@ -1654,6 +1681,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level
+ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level affect
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name
@@ -1693,6 +1721,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
+ Entry.IsTopLevelAmongAffecting =
+ getAffectingIncludeLoc(SourceMgr, File).isInvalid();
Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());
auto ContentHash = hash_code(-1);
@@ -1758,6 +1788,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.BufferOverridden,
Entry.IsTransient,
Entry.IsTopLevel,
+ Entry.IsTopLevelAmongAffecting,
Entry.IsModuleMap,
NameAsRequested.size()};
@@ -2219,7 +2250,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
SLocEntryOffsets.push_back(Offset);
// Starting offset of this entry within this module, so skip the dummy.
Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
- AddSourceLocation(File.getIncludeLoc(), Record);
+ AddSourceLocation(getAffectingIncludeLoc(SourceMgr, File), Record);
Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
Record.push_back(File.hasLineDirectives());
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index f46324ee9989eb..ae8cb664ca14b5 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -616,7 +616,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
*MF, /*IncludeSystem=*/true,
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
- if (!(IFI.TopLevel && IFI.ModuleMap))
+ if (!(IFI.TopLevelAmongAffecting && IFI.ModuleMap))
return;
if (StringRef(IFI.FilenameAsRequested)
.ends_with("__inferred_module.map"))
diff --git a/clang/test/ClangScanDeps/modules-extern-unrelated.m b/clang/test/ClangScanDeps/modules-extern-unrelated.m
index 76611c596d3eff..23ddbb0a681203 100644
--- a/clang/test/ClangScanDeps/modules-extern-unrelated.m
+++ b/clang/test/ClangScanDeps/modules-extern-unrelated.m
@@ -1,3 +1,6 @@
+// This test checks that parent module maps that do not define any related
+// modules are not affecting.
+
// RUN: rm -rf %t
// RUN: split-file %s %t
@@ -22,15 +25,8 @@
//--- second/second.h
#include "first_other.h"
-//--- cdb.json.template
-[{
- "directory": "DIR",
- "file": "DIR/tu.m",
- "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR/zeroth -I DIR/first -I DIR/second -c DIR/tu.m -o DIR/tu.o"
-}]
-
-// RUN: sed -e "s|DIR|%/t|g" -e "s|INPUTS|%/S/Inputs|g" %t/cdb.json.template > %t/cdb.json
-// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json
+// RUN: clang-scan-deps -format experimental-full -o %t/result.json \
+// RUN: -- %clang -fmodules -fmodules-cache-path=%t/cache -I %t/zeroth -I %t/first -I %t/second -c %t/tu.m -o %t/tu.o
// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
// CHECK: {
@@ -67,11 +63,11 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/second/second.modulemap",
// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/second/second.h",
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap"
// CHECK-NEXT: ],
@@ -90,11 +86,11 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/zeroth/module.modulemap",
// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "-fmodule-map-file=[[PREFIX]]/second/module.modulemap"
// CHECK: ],
// CHECK-NEXT: "context-hash": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/first/module.modulemap",
-// CHECK-NEXT: "[[PREFIX]]/second/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/second/second.modulemap",
// CHECK-NEXT: "[[PREFIX]]/zeroth/module.modulemap",
// CHECK-NEXT: "[[PREFIX]]/zeroth/zeroth.h"
@@ -115,7 +111,7 @@
// CHECK-NEXT: ],
// CHECK-NEXT: "command-line": [
// CHECK: ],
-// CHECK-NEXT: "executable": "clang",
+// CHECK-NEXT: "executable": "{{.*}}",
// CHECK-NEXT: "file-deps": [
// CHECK-NEXT: "[[PREFIX]]/tu.m"
// CHECK-NEXT: ],
More information about the cfe-commits
mailing list