[clang] dcd3a0c - [clang][modules][deps] Create more efficient API for visitation of `ModuleFile` inputs
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 9 10:19:43 PDT 2023
Author: Jan Svoboda
Date: 2023-08-09T10:19:36-07:00
New Revision: dcd3a0c9f13b551ca2bcefa0dd181a383f44df49
URL: https://github.com/llvm/llvm-project/commit/dcd3a0c9f13b551ca2bcefa0dd181a383f44df49
DIFF: https://github.com/llvm/llvm-project/commit/dcd3a0c9f13b551ca2bcefa0dd181a383f44df49.diff
LOG: [clang][modules][deps] Create more efficient API for visitation of `ModuleFile` inputs
The current `ASTReader::visitInputFiles()` function calls into `FileManager` to create `FileEntryRef` objects. This ends up being fairly costly in `clang-scan-deps`, where we mostly only care about file paths.
This patch introduces new `ASTReader` API that gives clients access to just the serialized paths. Since the scanner needs both the as-requested path and the on-disk one (and doesn't want to transform the former into the latter via `FileManager`), this patch starts serializing both of them into the PCM file if they differ.
This increases the size of scanning PCMs by 0.1% and speeds up scanning by 5%.
Reviewed By: benlangmuir, vsapsai
Differential Revision: https://reviews.llvm.org/D157066
Added:
Modified:
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 2ae9e09998c4c1..3ad22bbcf5d12d 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
-const unsigned VERSION_MAJOR = 27;
+const unsigned VERSION_MAJOR = 28;
/// AST file minor version number supported by this version of
/// Clang.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index d56e2117a53f0d..7706de74d3632d 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2373,6 +2373,13 @@ class ASTReader
/// Loads comments ranges.
void ReadComments() override;
+ /// Visit all the input file infos of the given module file.
+ void visitInputFileInfos(
+ serialization::ModuleFile &MF, bool IncludeSystem,
+ llvm::function_ref<void(const serialization::InputFileInfo &IFI,
+ bool IsSystem)>
+ Visitor);
+
/// Visit all the input files of the given module file.
void visitInputFiles(serialization::ModuleFile &MF,
bool IncludeSystem, bool Complain,
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index b632b4e3e7a7ce..8c4067853d6646 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -61,13 +61,15 @@ enum ModuleKind {
/// The input file info that has been loaded from an AST file.
struct InputFileInfo {
+ std::string FilenameAsRequested;
std::string Filename;
uint64_t ContentHash;
off_t StoredSize;
time_t StoredTime;
bool Overridden;
bool Transient;
- bool TopLevelModuleMap;
+ bool TopLevel;
+ bool ModuleMap;
};
/// The input file that has been loaded from this AST file, along with
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index dcb845dd551e8d..f6c643d9113d8a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2342,9 +2342,22 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
R.StoredTime = static_cast<time_t>(Record[2]);
R.Overridden = static_cast<bool>(Record[3]);
R.Transient = static_cast<bool>(Record[4]);
- R.TopLevelModuleMap = static_cast<bool>(Record[5]);
- R.Filename = std::string(Blob);
- ResolveImportedPath(F, R.Filename);
+ R.TopLevel = static_cast<bool>(Record[5]);
+ R.ModuleMap = static_cast<bool>(Record[6]);
+ std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
+ uint16_t AsRequestedLength = Record[7];
+
+ std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
+ std::string Name = Blob.substr(AsRequestedLength).str();
+
+ ResolveImportedPath(F, NameAsRequested);
+ ResolveImportedPath(F, Name);
+
+ if (Name.empty())
+ Name = NameAsRequested;
+
+ return std::make_pair(std::move(NameAsRequested), std::move(Name));
+ }();
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
if (!MaybeEntry) // FIXME this drops errors on the floor.
@@ -2395,7 +2408,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
time_t StoredTime = FI.StoredTime;
bool Overridden = FI.Overridden;
bool Transient = FI.Transient;
- StringRef Filename = FI.Filename;
+ StringRef Filename = FI.FilenameAsRequested;
uint64_t StoredContentHash = FI.ContentHash;
// For standard C++ modules, we don't need to check the inputs.
@@ -2742,9 +2755,9 @@ ASTReader::ReadControlBlock(ModuleFile &F,
for (unsigned I = 0; I < N; ++I) {
bool IsSystem = I >= NumUserInputs;
InputFileInfo FI = getInputFileInfo(F, I + 1);
- Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden,
- F.Kind == MK_ExplicitModule ||
- F.Kind == MK_PrebuiltModule);
+ Listener->visitInputFile(
+ FI.FilenameAsRequested, IsSystem, FI.Overridden,
+ F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule);
}
}
@@ -9307,6 +9320,22 @@ void ASTReader::ReadComments() {
}
}
+void ASTReader::visitInputFileInfos(
+ serialization::ModuleFile &MF, bool IncludeSystem,
+ llvm::function_ref<void(const serialization::InputFileInfo &IFI,
+ bool IsSystem)>
+ Visitor) {
+ unsigned NumUserInputs = MF.NumUserInputFiles;
+ unsigned NumInputs = MF.InputFilesLoaded.size();
+ assert(NumUserInputs <= NumInputs);
+ unsigned N = IncludeSystem ? NumInputs : NumUserInputs;
+ for (unsigned I = 0; I < N; ++I) {
+ bool IsSystem = I >= NumUserInputs;
+ InputFileInfo IFI = getInputFileInfo(MF, I+1);
+ Visitor(IFI, IsSystem);
+ }
+}
+
void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
bool IncludeSystem, bool Complain,
llvm::function_ref<void(const serialization::InputFile &IF,
@@ -9328,7 +9357,7 @@ void ASTReader::visitTopLevelModuleMaps(
unsigned NumInputs = MF.InputFilesLoaded.size();
for (unsigned I = 0; I < NumInputs; ++I) {
InputFileInfo IFI = getInputFileInfo(MF, I + 1);
- if (IFI.TopLevelModuleMap)
+ if (IFI.TopLevel && IFI.ModuleMap)
if (auto FE = getInputFile(MF, I + 1).getFile())
Visitor(*FE);
}
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index e238ad3d186fba..8b0015f1ee9f6c 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1525,7 +1525,8 @@ struct InputFileEntry {
bool IsSystemFile;
bool IsTransient;
bool BufferOverridden;
- bool IsTopLevelModuleMap;
+ bool IsTopLevel;
+ bool IsModuleMap;
uint32_t ContentHash[2];
InputFileEntry(FileEntryRef File) : File(File) {}
@@ -1547,8 +1548,10 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
+ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Top-level
IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
- IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
+ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Name as req. len
+ IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name as req. + name
unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));
// Create input file hash abbreviation.
@@ -1582,8 +1585,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
Entry.IsTransient = Cache->IsTransient;
Entry.BufferOverridden = Cache->BufferOverridden;
- Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
- File.getIncludeLoc().isInvalid();
+ Entry.IsTopLevel = File.getIncludeLoc().isInvalid();
+ Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic());
auto ContentHash = hash_code(-1);
if (PP->getHeaderSearchInfo()
@@ -1631,6 +1634,15 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
// Emit size/modification time for this file.
// And whether this file was overridden.
{
+ SmallString<128> NameAsRequested = Entry.File.getNameAsRequested();
+ SmallString<128> Name = Entry.File.getName();
+
+ PreparePathForOutput(NameAsRequested);
+ PreparePathForOutput(Name);
+
+ if (Name == NameAsRequested)
+ Name.clear();
+
RecordData::value_type Record[] = {
INPUT_FILE,
InputFileOffsets.size(),
@@ -1638,9 +1650,12 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
(uint64_t)getTimestampForOutput(Entry.File),
Entry.BufferOverridden,
Entry.IsTransient,
- Entry.IsTopLevelModuleMap};
+ Entry.IsTopLevel,
+ Entry.IsModuleMap,
+ NameAsRequested.size()};
- EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getNameAsRequested());
+ Stream.EmitRecordWithBlob(IFAbbrevCode, Record,
+ (NameAsRequested + Name).str());
}
// Emit content hash for this file.
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index fa75c83ef64017..478374b1e1e1b7 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -459,18 +459,19 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
serialization::ModuleFile *MF =
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
M->getASTFile());
- MDC.ScanInstance.getASTReader()->visitInputFiles(
- *MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
+ MDC.ScanInstance.getASTReader()->visitInputFileInfos(
+ *MF, /*IncludeSystem=*/true,
+ [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
// __inferred_module.map is the result of the way in which an implicit
// module build handles inferred modules. It adds an overlay VFS with
// this file in the proper directory and relies on the rest of Clang to
// handle it like normal. With explicitly built modules we don't need
// to play VFS tricks, so replace it with the correct module map.
- if (IF.getFile()->getName().endswith("__inferred_module.map")) {
+ if (StringRef(IFI.Filename).endswith("__inferred_module.map")) {
MDC.addFileDep(MD, ModuleMap->getName());
return;
}
- MDC.addFileDep(MD, IF.getFile()->getName());
+ MDC.addFileDep(MD, IFI.Filename);
});
llvm::DenseSet<const Module *> SeenDeps;
@@ -478,11 +479,15 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
addAllSubmoduleDeps(M, MD, SeenDeps);
addAllAffectingClangModules(M, MD, SeenDeps);
- MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
- *MF, [&](FileEntryRef FE) {
- if (FE.getNameAsRequested().endswith("__inferred_module.map"))
+ MDC.ScanInstance.getASTReader()->visitInputFileInfos(
+ *MF, /*IncludeSystem=*/true,
+ [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
+ if (!(IFI.TopLevel && IFI.ModuleMap))
return;
- MD.ModuleMapFileDeps.emplace_back(FE.getNameAsRequested());
+ if (StringRef(IFI.FilenameAsRequested)
+ .endswith("__inferred_module.map"))
+ return;
+ MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
});
CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs(
More information about the cfe-commits
mailing list