[clang] [clang][serialization] Blobify IMPORTS strings and signatures (PR #116095)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 13 11:21:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules
Author: Jan Svoboda (jansvoboda11)
<details>
<summary>Changes</summary>
This PR changes a part of the PCM format to store string-like things in the blob attached to a record instead of VBR6-encoding them into the record itself. Applied to the `IMPORTS` section (which is very hot), this speeds up dependency scanning by 2.8%.
---
Patch is 23.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116095.diff
6 Files Affected:
- (modified) clang/include/clang/Serialization/ASTBitCodes.h (+3-4)
- (modified) clang/include/clang/Serialization/ASTReader.h (+4-10)
- (modified) clang/include/clang/Serialization/ASTWriter.h (+4)
- (modified) clang/lib/Serialization/ASTReader.cpp (+120-104)
- (modified) clang/lib/Serialization/ASTWriter.cpp (+44-12)
- (modified) clang/lib/Serialization/GlobalModuleIndex.cpp (+52-56)
``````````diff
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 8725d5455ec735..8746f482af8f3d 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -44,7 +44,7 @@ namespace serialization {
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
-const unsigned VERSION_MAJOR = 33;
+const unsigned VERSION_MAJOR = 34;
/// AST file minor version number supported by this version of
/// Clang.
@@ -350,9 +350,8 @@ enum ControlRecordTypes {
/// and information about the compiler used to build this AST file.
METADATA = 1,
- /// Record code for the list of other AST files imported by
- /// this AST file.
- IMPORTS,
+ /// Record code for other AST file imported by this AST file.
+ IMPORT,
/// Record code for the original file that was used to
/// generate the AST file, including both its file ID and its
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 9c274adc59a207..f739fe688c110d 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2389,11 +2389,8 @@ class ASTReader
// Read a string
static std::string ReadString(const RecordDataImpl &Record, unsigned &Idx);
-
- // Skip a string
- static void SkipString(const RecordData &Record, unsigned &Idx) {
- Idx += Record[Idx] + 1;
- }
+ static StringRef ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
+ StringRef &Blob);
// Read a path
std::string ReadPath(ModuleFile &F, const RecordData &Record, unsigned &Idx);
@@ -2401,11 +2398,8 @@ class ASTReader
// Read a path
std::string ReadPath(StringRef BaseDirectory, const RecordData &Record,
unsigned &Idx);
-
- // Skip a path
- static void SkipPath(const RecordData &Record, unsigned &Idx) {
- SkipString(Record, Idx);
- }
+ std::string ReadPathBlob(StringRef BaseDirectory, const RecordData &Record,
+ unsigned &Idx, StringRef &Blob);
/// Read a version tuple.
static VersionTuple ReadVersionTuple(const RecordData &Record, unsigned &Idx);
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 161b2ef7c86a49..e418fdea44a0a9 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -769,6 +769,8 @@ class ASTWriter : public ASTDeserializationListener,
/// Add a string to the given record.
void AddString(StringRef Str, RecordDataImpl &Record);
+ void AddStringBlob(StringRef Str, RecordDataImpl &Record,
+ SmallVectorImpl<char> &Blob);
/// Convert a path from this build process into one that is appropriate
/// for emission in the module file.
@@ -776,6 +778,8 @@ class ASTWriter : public ASTDeserializationListener,
/// Add a path to the given record.
void AddPath(StringRef Path, RecordDataImpl &Record);
+ void AddPathBlob(StringRef Str, RecordDataImpl &Record,
+ SmallVectorImpl<char> &Blob);
/// Emit the current record with the given path as a blob.
void EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 8b928ede395ae5..ec85fad3389a1c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3092,98 +3092,97 @@ ASTReader::ReadControlBlock(ModuleFile &F,
break;
}
- case IMPORTS: {
+ case IMPORT: {
// Validate the AST before processing any imports (otherwise, untangling
// them can be error-prone and expensive). A module will have a name and
// will already have been validated, but this catches the PCH case.
if (ASTReadResult Result = readUnhashedControlBlockOnce())
return Result;
- // Load each of the imported PCH files.
- unsigned Idx = 0, N = Record.size();
- while (Idx < N) {
- // Read information about the AST file.
- ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
- // Whether we're importing a standard c++ module.
- bool IsImportingStdCXXModule = Record[Idx++];
- // The import location will be the local one for now; we will adjust
- // all import locations of module imports after the global source
- // location info are setup, in ReadAST.
- auto [ImportLoc, ImportModuleFileIndex] =
- ReadUntranslatedSourceLocation(Record[Idx++]);
- // The import location must belong to the current module file itself.
- assert(ImportModuleFileIndex == 0);
- off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0;
- time_t StoredModTime =
- !IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0;
-
- ASTFileSignature StoredSignature;
- if (!IsImportingStdCXXModule) {
- auto FirstSignatureByte = Record.begin() + Idx;
- StoredSignature = ASTFileSignature::create(
- FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
- Idx += ASTFileSignature::size;
- }
+ unsigned Idx = 0;
+ // Read information about the AST file.
+ ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
+
+ // The import location will be the local one for now; we will adjust
+ // all import locations of module imports after the global source
+ // location info are setup, in ReadAST.
+ auto [ImportLoc, ImportModuleFileIndex] =
+ ReadUntranslatedSourceLocation(Record[Idx++]);
+ // The import location must belong to the current module file itself.
+ assert(ImportModuleFileIndex == 0);
+
+ StringRef ImportedName = ReadStringBlob(Record, Idx, Blob);
+
+ bool IsImportingStdCXXModule = Record[Idx++];
+
+ off_t StoredSize = 0;
+ time_t StoredModTime = 0;
+ ASTFileSignature StoredSignature;
+ std::string ImportedFile;
+
+ // For prebuilt and explicit modules first consult the file map for
+ // an override. Note that here we don't search prebuilt module
+ // directories if we're not importing standard c++ module, only the
+ // explicit name to file mappings. Also, we will still verify the
+ // size/signature making sure it is essentially the same file but
+ // perhaps in a different location.
+ if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
+ ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
+ ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);
+
+ if (IsImportingStdCXXModule && ImportedFile.empty()) {
+ Diag(diag::err_failed_to_find_module_file) << ImportedName;
+ return Missing;
+ }
- std::string ImportedName = ReadString(Record, Idx);
- std::string ImportedFile;
-
- // For prebuilt and explicit modules first consult the file map for
- // an override. Note that here we don't search prebuilt module
- // directories if we're not importing standard c++ module, only the
- // explicit name to file mappings. Also, we will still verify the
- // size/signature making sure it is essentially the same file but
- // perhaps in a different location.
- if (ImportedKind == MK_PrebuiltModule || ImportedKind == MK_ExplicitModule)
- ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
- ImportedName, /*FileMapOnly*/ !IsImportingStdCXXModule);
-
- // For C++20 Modules, we won't record the path to the imported modules
- // in the BMI
- if (!IsImportingStdCXXModule) {
- if (ImportedFile.empty()) {
- // Use BaseDirectoryAsWritten to ensure we use the same path in the
- // ModuleCache as when writing.
- ImportedFile = ReadPath(BaseDirectoryAsWritten, Record, Idx);
- } else
- SkipPath(Record, Idx);
- } else if (ImportedFile.empty()) {
- Diag(clang::diag::err_failed_to_find_module_file) << ImportedName;
- return Missing;
- }
+ if (!IsImportingStdCXXModule) {
+ StoredSize = (off_t)Record[Idx++];
+ StoredModTime = (time_t)Record[Idx++];
- // If our client can't cope with us being out of date, we can't cope with
- // our dependency being missing.
- unsigned Capabilities = ClientLoadCapabilities;
- if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
- Capabilities &= ~ARR_Missing;
-
- // Load the AST file.
- auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
- Loaded, StoredSize, StoredModTime,
- StoredSignature, Capabilities);
-
- // If we diagnosed a problem, produce a backtrace.
- bool recompilingFinalized =
- Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
- getModuleManager().getModuleCache().isPCMFinal(F.FileName);
- if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
- Diag(diag::note_module_file_imported_by)
- << F.FileName << !F.ModuleName.empty() << F.ModuleName;
- if (recompilingFinalized)
- Diag(diag::note_module_file_conflict);
-
- switch (Result) {
- case Failure: return Failure;
- // If we have to ignore the dependency, we'll have to ignore this too.
- case Missing:
- case OutOfDate: return OutOfDate;
- case VersionMismatch: return VersionMismatch;
- case ConfigurationMismatch: return ConfigurationMismatch;
- case HadErrors: return HadErrors;
- case Success: break;
+ StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size);
+ StoredSignature = ASTFileSignature::create(SignatureBytes.begin(),
+ SignatureBytes.end());
+ Blob = Blob.substr(ASTFileSignature::size);
+
+ if (ImportedFile.empty()) {
+ // Use BaseDirectoryAsWritten to ensure we use the same path in the
+ // ModuleCache as when writing.
+ ImportedFile =
+ ReadPathBlob(BaseDirectoryAsWritten, Record, Idx, Blob);
}
}
+
+ // If our client can't cope with us being out of date, we can't cope with
+ // our dependency being missing.
+ unsigned Capabilities = ClientLoadCapabilities;
+ if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+ Capabilities &= ~ARR_Missing;
+
+ // Load the AST file.
+ auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
+ Loaded, StoredSize, StoredModTime,
+ StoredSignature, Capabilities);
+
+ // If we diagnosed a problem, produce a backtrace.
+ bool recompilingFinalized =
+ Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
+ getModuleManager().getModuleCache().isPCMFinal(F.FileName);
+ if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
+ Diag(diag::note_module_file_imported_by)
+ << F.FileName << !F.ModuleName.empty() << F.ModuleName;
+ if (recompilingFinalized)
+ Diag(diag::note_module_file_conflict);
+
+ switch (Result) {
+ case Failure: return Failure;
+ // If we have to ignore the dependency, we'll have to ignore this too.
+ case Missing:
+ case OutOfDate: return OutOfDate;
+ case VersionMismatch: return VersionMismatch;
+ case ConfigurationMismatch: return ConfigurationMismatch;
+ case HadErrors: return HadErrors;
+ case Success: break;
+ }
break;
}
@@ -5624,36 +5623,38 @@ bool ASTReader::readASTFileControlBlock(
break;
}
- case IMPORTS: {
+ case IMPORT: {
if (!NeedsImports)
break;
- unsigned Idx = 0, N = Record.size();
- while (Idx < N) {
- // Read information about the AST file.
+ unsigned Idx = 0;
+ // Read information about the AST file.
+
+ // Skip Kind
+ Idx++;
- // Skip Kind
- Idx++;
- bool IsStandardCXXModule = Record[Idx++];
+ // Skip ImportLoc
+ Idx++;
- // Skip ImportLoc
- Idx++;
+ StringRef ModuleName = ReadStringBlob(Record, Idx, Blob);
- // In C++20 Modules, we don't record the path to imported
- // modules in the BMI files.
- if (IsStandardCXXModule) {
- std::string ModuleName = ReadString(Record, Idx);
- Listener.visitImport(ModuleName, /*Filename=*/"");
- continue;
- }
+ bool IsStandardCXXModule = Record[Idx++];
- // Skip Size, ModTime and Signature
- Idx += 1 + 1 + ASTFileSignature::size;
- std::string ModuleName = ReadString(Record, Idx);
- std::string FilenameStr = ReadString(Record, Idx);
- auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
- Listener.visitImport(ModuleName, *Filename);
+ // In C++20 Modules, we don't record the path to imported
+ // modules in the BMI files.
+ if (IsStandardCXXModule) {
+ Listener.visitImport(ModuleName, /*Filename=*/"");
+ continue;
}
+
+ // Skip Size and ModTime.
+ Idx += 1 + 1;
+ // Skip signature.
+ Blob = Blob.substr(ASTFileSignature::size);
+
+ StringRef FilenameStr = ReadStringBlob(Record, Idx, Blob);
+ auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
+ Listener.visitImport(ModuleName, *Filename);
break;
}
@@ -9602,6 +9603,14 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
return Result;
}
+StringRef ASTReader::ReadStringBlob(const RecordDataImpl &Record, unsigned &Idx,
+ StringRef &Blob) {
+ unsigned Len = Record[Idx++];
+ StringRef Result = Blob.substr(0, Len);
+ Blob = Blob.substr(Len);
+ return Result;
+}
+
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
unsigned &Idx) {
return ReadPath(F.BaseDirectory, Record, Idx);
@@ -9613,6 +9622,13 @@ std::string ASTReader::ReadPath(StringRef BaseDirectory,
return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
}
+std::string ASTReader::ReadPathBlob(StringRef BaseDirectory,
+ const RecordData &Record, unsigned &Idx,
+ StringRef &Blob) {
+ StringRef Filename = ReadStringBlob(Record, Idx, Blob);
+ return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
+}
+
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
unsigned &Idx) {
unsigned Major = Record[Idx++];
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 88b3e649a5d466..a52d59c61c4ce6 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -878,7 +878,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(MODULE_NAME);
RECORD(MODULE_DIRECTORY);
RECORD(MODULE_MAP_FILE);
- RECORD(IMPORTS);
+ RECORD(IMPORT);
RECORD(ORIGINAL_FILE);
RECORD(ORIGINAL_FILE_ID);
RECORD(INPUT_FILE_OFFSETS);
@@ -1536,34 +1536,53 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) {
// Imports
if (Chain) {
- serialization::ModuleManager &Mgr = Chain->getModuleManager();
- Record.clear();
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(IMPORT));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Kind
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ImportLoc
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Module name len
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Standard C++ mod
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File size
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File timestamp
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File name len
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Strings
+ unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
- for (ModuleFile &M : Mgr) {
+ SmallString<128> Blob;
+
+ for (ModuleFile &M : Chain->getModuleManager()) {
// Skip modules that weren't directly imported.
if (!M.isDirectlyImported())
continue;
+ Record.clear();
+ Blob.clear();
+
+ Record.push_back(IMPORT);
Record.push_back((unsigned)M.Kind); // FIXME: Stable encoding
- Record.push_back(M.StandardCXXModule);
AddSourceLocation(M.ImportLoc, Record);
+ AddStringBlob(M.ModuleName, Record, Blob);
+ Record.push_back(M.StandardCXXModule);
// We don't want to hard code the information about imported modules
// in the C++20 named modules.
- if (!M.StandardCXXModule) {
+ if (M.StandardCXXModule) {
+ Record.push_back(0);
+ Record.push_back(0);
+ Record.push_back(0);
+ } else {
// If we have calculated signature, there is no need to store
// the size or timestamp.
Record.push_back(M.Signature ? 0 : M.File.getSize());
Record.push_back(M.Signature ? 0 : getTimestampForOutput(M.File));
- llvm::append_range(Record, M.Signature);
- }
- AddString(M.ModuleName, Record);
+ llvm::append_range(Blob, M.Signature);
- if (!M.StandardCXXModule)
- AddPath(M.FileName, Record);
+ AddPathBlob(M.FileName, Record, Blob);
+ }
+
+ Stream.EmitRecordWithBlob(AbbrevCode, Record, Blob);
}
- Stream.EmitRecord(IMPORTS, Record);
}
// Write the options block.
@@ -4777,6 +4796,12 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) {
Record.insert(Record.end(), Str.begin(), Str.end());
}
+void ASTWriter::AddStringBlob(StringRef Str, RecordDataImpl &Record,
+ SmallVectorImpl<char> &Blob) {
+ Record.push_back(Str.size());
+ Blob.insert(Blob.end(), Str.begin(), Str.end());
+}
+
bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) {
assert(WritingAST && "can't prepare path for output when not writing AST");
@@ -4805,6 +4830,13 @@ void ASTWriter::AddPath(StringRef Path, RecordDataImpl &Record) {
AddString(FilePath, Record);
}
+void ASTWriter::AddPathBlob(StringRef Path, RecordDataImpl &Record,
+ SmallVectorImpl<char> &Blob) {
+ SmallString<128> FilePath(Path);
+ PreparePathForOutput(FilePath);
+ AddStringBlob(FilePath, Record, Blob);
+}
+
void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
StringRef Path) {
SmallString<128> FilePath(Path);
diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp
index 9c48712a0b3fbe..4b920fccecac3d 100644
--- a/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -614,62 +614,58 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(FileEntryRef File) {
unsigned Code = MaybeCode.get();
// Handle module dependencies.
- if (State == ControlBlock && Code == IMPORTS) {
- // Load each of the imported PCH files.
- unsigned Idx = 0, N = Record.size();
- while (Idx < N) {
- // Read information about the AST file.
-
- // Skip the imported kind
- ++Idx;
-
- // Skip if it is standard C++ module
- ++Idx;
-
- // Skip the import location
- ++Idx;
-
- // Load stored size/modification time.
- off_t StoredSize = (off_t)Record[Idx++];
- time_t StoredModTime = (time_t)Record[Idx++];
-
- // Skip the stored signature.
- // FIXME: we could read the signature out of the import and validate it.
- auto FirstSignatureByte = Record.begin() + Idx;
- ASTFileSignature StoredSignature = ASTFileSignature::create(
- FirstSignatureByte, FirstSignatureByte...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116095
More information about the cfe-commits
mailing list