[clang] a553c62 - [clang][modules] Avoid allocations when reading blob paths (#113984)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 31 10:18:24 PDT 2024
Author: Jan Svoboda
Date: 2024-10-31T10:18:21-07:00
New Revision: a553c620b7d70dedd268aa2588e5e50e7dc6ccc8
URL: https://github.com/llvm/llvm-project/commit/a553c620b7d70dedd268aa2588e5e50e7dc6ccc8
DIFF: https://github.com/llvm/llvm-project/commit/a553c620b7d70dedd268aa2588e5e50e7dc6ccc8.diff
LOG: [clang][modules] Avoid allocations when reading blob paths (#113984)
When reading a path from a bitstream blob, `ASTReader` performs up to
three allocations:
1. Conversion of the `StringRef` blob into `std::string` to conform to
the `ResolveImportedPath()` API that takes `std::string &`.
2. Concatenation of the module file prefix directory and the relative
path into a fresh `SmallString<128>` buffer in `ResolveImportedPath()`.
3. Propagating the result out of `ResolveImportedPath()` by calling
`std::string::assign()` on the out-parameter.
This patch makes is so that we avoid allocations altogether (amortized)
by:
1. Avoiding conversion of the `StringRef` blob into `std::string` and
changing the `ResolveImportedPath()` API.
2. Using one "global" buffer to hold the concatenation.
3. Returning `StringRef` that points into the buffer and ensuring the
contents are not overwritten while it lives.
Note that in some places of the bitstream we don't store paths as blobs,
but rather as records that get VBR-encoded. This makes the allocation in
(1) unavoidable. I plan to fix this in a follow-up PR by changing the
PCM format.
Moreover, there are some data structures (e.g.
`serialization::InputFileInfo`) that store deserialized and resolved
paths as `std::string`. If we don't access them frequently, it would be
more efficient to store just the unresolved `StringRef` and resolve them
on demand (within some kind of shared buffer to prevent allocations).
This PR alone improves `clang-scan-deps` performance on my workload by
3.6%.
Added:
Modified:
clang/include/clang/Serialization/ASTReader.h
clang/lib/Serialization/ASTReader.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 070c1c9a54f48c..9c274adc59a207 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -50,6 +50,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/VersionTuple.h"
#include <cassert>
@@ -1341,9 +1342,48 @@ class ASTReader
serialization::InputFile getInputFile(ModuleFile &F, unsigned ID,
bool Complain = true);
+ /// The buffer used as the temporary backing storage for resolved paths.
+ SmallString<0> PathBuf;
+
+ /// A wrapper around StringRef that temporarily borrows the underlying buffer.
+ class TemporarilyOwnedStringRef {
+ StringRef String;
+ llvm::SaveAndRestore<SmallString<0>> UnderlyingBuffer;
+
+ public:
+ TemporarilyOwnedStringRef(StringRef S, SmallString<0> &UnderlyingBuffer)
+ : String(S), UnderlyingBuffer(UnderlyingBuffer, {}) {}
+
+ /// Return the wrapped \c StringRef that must be outlived by \c this.
+ const StringRef *operator->() const & { return &String; }
+ const StringRef &operator*() const & { return String; }
+
+ /// Make it harder to get a \c StringRef that outlives \c this.
+ const StringRef *operator->() && = delete;
+ const StringRef &operator*() && = delete;
+ };
+
public:
- void ResolveImportedPath(ModuleFile &M, std::string &Filename);
- static void ResolveImportedPath(std::string &Filename, StringRef Prefix);
+ /// Get the buffer for resolving paths.
+ SmallString<0> &getPathBuf() { return PathBuf; }
+
+ /// Resolve \c Path in the context of module file \c M. The return value
+ /// must go out of scope before the next call to \c ResolveImportedPath.
+ static TemporarilyOwnedStringRef
+ ResolveImportedPath(SmallString<0> &Buf, StringRef Path, ModuleFile &ModF);
+ /// Resolve \c Path in the context of the \c Prefix directory. The return
+ /// value must go out of scope before the next call to \c ResolveImportedPath.
+ static TemporarilyOwnedStringRef
+ ResolveImportedPath(SmallString<0> &Buf, StringRef Path, StringRef Prefix);
+
+ /// Resolve \c Path in the context of module file \c M.
+ static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
+ StringRef Path,
+ ModuleFile &ModF);
+ /// Resolve \c Path in the context of the \c Prefix directory.
+ static std::string ResolveImportedPathAndAllocate(SmallString<0> &Buf,
+ StringRef Path,
+ StringRef Prefix);
/// Returns the first key declaration for the given declaration. This
/// is one that is formerly-canonical (or still canonical) and whose module
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 692fb8a91840c0..d383b4f29a5264 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2048,9 +2048,9 @@ HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
if (!Key.Imported)
return FileMgr.getOptionalFileRef(Key.Filename);
- std::string Resolved = std::string(Key.Filename);
- Reader.ResolveImportedPath(M, Resolved);
- return FileMgr.getOptionalFileRef(Resolved);
+ auto Resolved =
+ ASTReader::ResolveImportedPath(Reader.getPathBuf(), Key.Filename, M);
+ return FileMgr.getOptionalFileRef(*Resolved);
}
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -2513,11 +2513,12 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
uint16_t AsRequestedLength = Record[7];
- std::string NameAsRequested = Blob.substr(0, AsRequestedLength).str();
- std::string Name = Blob.substr(AsRequestedLength).str();
+ StringRef NameAsRequestedRef = Blob.substr(0, AsRequestedLength);
+ StringRef NameRef = Blob.substr(AsRequestedLength);
- ResolveImportedPath(F, NameAsRequested);
- ResolveImportedPath(F, Name);
+ std::string NameAsRequested =
+ ResolveImportedPathAndAllocate(PathBuf, NameAsRequestedRef, F);
+ std::string Name = ResolveImportedPathAndAllocate(PathBuf, NameRef, F);
if (Name.empty())
Name = NameAsRequested;
@@ -2743,23 +2744,38 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
return IF;
}
-/// If we are loading a relocatable PCH or module file, and the filename
-/// is not an absolute path, add the system or module root to the beginning of
-/// the file name.
-void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
- // Resolve relative to the base directory, if we have one.
- if (!M.BaseDirectory.empty())
- return ResolveImportedPath(Filename, M.BaseDirectory);
+ASTReader::TemporarilyOwnedStringRef
+ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
+ ModuleFile &ModF) {
+ return ResolveImportedPath(Buf, Path, ModF.BaseDirectory);
}
-void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
- if (Filename.empty() || llvm::sys::path::is_absolute(Filename) ||
- Filename == "<built-in>" || Filename == "<command line>")
- return;
+ASTReader::TemporarilyOwnedStringRef
+ASTReader::ResolveImportedPath(SmallString<0> &Buf, StringRef Path,
+ StringRef Prefix) {
+ assert(Buf.capacity() != 0 && "Overlapping ResolveImportedPath calls");
+
+ if (Prefix.empty() || Path.empty() || llvm::sys::path::is_absolute(Path) ||
+ Path == "<built-in>" || Path == "<command line>")
+ return {Path, Buf};
+
+ Buf.clear();
+ llvm::sys::path::append(Buf, Prefix, Path);
+ StringRef ResolvedPath{Buf.data(), Buf.size()};
+ return {ResolvedPath, Buf};
+}
- SmallString<128> Buffer;
- llvm::sys::path::append(Buffer, Prefix, Filename);
- Filename.assign(Buffer.begin(), Buffer.end());
+std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
+ StringRef P,
+ ModuleFile &ModF) {
+ return ResolveImportedPathAndAllocate(Buf, P, ModF.BaseDirectory);
+}
+
+std::string ASTReader::ResolveImportedPathAndAllocate(SmallString<0> &Buf,
+ StringRef P,
+ StringRef Prefix) {
+ auto ResolvedPath = ResolveImportedPath(Buf, P, Prefix);
+ return ResolvedPath->str();
}
static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
@@ -3187,8 +3203,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
case ORIGINAL_FILE:
F.OriginalSourceFileID = FileID::get(Record[0]);
F.ActualOriginalSourceFileName = std::string(Blob);
- F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
- ResolveImportedPath(F, F.OriginalSourceFileName);
+ F.OriginalSourceFileName = ResolveImportedPathAndAllocate(
+ PathBuf, F.ActualOriginalSourceFileName, F);
break;
case ORIGINAL_FILE_ID:
@@ -5477,6 +5493,8 @@ bool ASTReader::readASTFileControlBlock(
RecordData Record;
std::string ModuleDir;
bool DoneWithControlBlock = false;
+ SmallString<0> PathBuf;
+ PathBuf.reserve(256);
while (!DoneWithControlBlock) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
@@ -5559,9 +5577,9 @@ bool ASTReader::readASTFileControlBlock(
break;
case MODULE_MAP_FILE: {
unsigned Idx = 0;
- auto Path = ReadString(Record, Idx);
- ResolveImportedPath(Path, ModuleDir);
- Listener.ReadModuleMapFile(Path);
+ std::string PathStr = ReadString(Record, Idx);
+ auto Path = ResolveImportedPath(PathBuf, PathStr, ModuleDir);
+ Listener.ReadModuleMapFile(*Path);
break;
}
case INPUT_FILE_OFFSETS: {
@@ -5608,10 +5626,9 @@ bool ASTReader::readASTFileControlBlock(
break;
case INPUT_FILE:
bool Overridden = static_cast<bool>(Record[3]);
- std::string Filename = std::string(Blob);
- ResolveImportedPath(Filename, ModuleDir);
+ auto Filename = ResolveImportedPath(PathBuf, Blob, ModuleDir);
shouldContinue = Listener.visitInputFile(
- Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
+ *Filename, isSystemFile, Overridden, /*IsExplicitModule=*/false);
break;
}
if (!shouldContinue)
@@ -5646,9 +5663,9 @@ bool ASTReader::readASTFileControlBlock(
// Skip Size, ModTime and Signature
Idx += 1 + 1 + ASTFileSignature::size;
std::string ModuleName = ReadString(Record, Idx);
- std::string Filename = ReadString(Record, Idx);
- ResolveImportedPath(Filename, ModuleDir);
- Listener.visitImport(ModuleName, Filename);
+ std::string FilenameStr = ReadString(Record, Idx);
+ auto Filename = ResolveImportedPath(PathBuf, FilenameStr, ModuleDir);
+ Listener.visitImport(ModuleName, *Filename);
}
break;
}
@@ -5901,9 +5918,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
// FIXME: This doesn't work for framework modules as `Filename` is the
// name as written in the module file and does not include
// `Headers/`, so this path will never exist.
- std::string Filename = std::string(Blob);
- ResolveImportedPath(F, Filename);
- if (auto Umbrella = PP.getFileManager().getOptionalFileRef(Filename)) {
+ auto Filename = ResolveImportedPath(PathBuf, Blob, F);
+ if (auto Umbrella = PP.getFileManager().getOptionalFileRef(*Filename)) {
if (!CurrentModule->getUmbrellaHeaderAsWritten()) {
// FIXME: NameAsWritten
ModMap.setUmbrellaHeaderAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -5931,18 +5947,16 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
break;
case SUBMODULE_TOPHEADER: {
- std::string HeaderName(Blob);
- ResolveImportedPath(F, HeaderName);
- CurrentModule->addTopHeaderFilename(HeaderName);
+ auto HeaderName = ResolveImportedPath(PathBuf, Blob, F);
+ CurrentModule->addTopHeaderFilename(*HeaderName);
break;
}
case SUBMODULE_UMBRELLA_DIR: {
// See comments in SUBMODULE_UMBRELLA_HEADER
- std::string Dirname = std::string(Blob);
- ResolveImportedPath(F, Dirname);
+ auto Dirname = ResolveImportedPath(PathBuf, Blob, F);
if (auto Umbrella =
- PP.getFileManager().getOptionalDirectoryRef(Dirname)) {
+ PP.getFileManager().getOptionalDirectoryRef(*Dirname)) {
if (!CurrentModule->getUmbrellaDirAsWritten()) {
// FIXME: NameAsWritten
ModMap.setUmbrellaDirAsWritten(CurrentModule, *Umbrella, Blob, "");
@@ -9597,17 +9611,13 @@ std::string ASTReader::ReadString(const RecordDataImpl &Record, unsigned &Idx) {
std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
unsigned &Idx) {
- std::string Filename = ReadString(Record, Idx);
- ResolveImportedPath(F, Filename);
- return Filename;
+ return ReadPath(F.BaseDirectory, Record, Idx);
}
std::string ASTReader::ReadPath(StringRef BaseDirectory,
const RecordData &Record, unsigned &Idx) {
std::string Filename = ReadString(Record, Idx);
- if (!BaseDirectory.empty())
- ResolveImportedPath(Filename, BaseDirectory);
- return Filename;
+ return ResolveImportedPathAndAllocate(PathBuf, Filename, BaseDirectory);
}
VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
@@ -10512,6 +10522,8 @@ ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
SourceMgr.setExternalSLocEntrySource(this);
+ PathBuf.reserve(256);
+
for (const auto &Ext : Extensions) {
auto BlockName = Ext->getExtensionMetadata().BlockName;
auto Known = ModuleFileExtensions.find(BlockName);
More information about the cfe-commits
mailing list