[clang] [clang][modules] Virtualize module cache pruning (PR #149113)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 16 08:08:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules
Author: Jan Svoboda (jansvoboda11)
<details>
<summary>Changes</summary>
This PR virtualizes module cache pruning via the new `ModuleCache` interface. Currently this is an NFC, but I left a FIXME in `InProcessModuleCache` to make this more efficient for the dependency scanner.
---
Full diff: https://github.com/llvm/llvm-project/pull/149113.diff
4 Files Affected:
- (modified) clang/include/clang/Serialization/ModuleCache.h (+8-1)
- (modified) clang/lib/Frontend/CompilerInstance.cpp (+4-89)
- (modified) clang/lib/Serialization/ModuleCache.cpp (+86)
- (modified) clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp (+7)
``````````diff
diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h
index 3117d954a09cc..ec052c5c18e0a 100644
--- a/clang/include/clang/Serialization/ModuleCache.h
+++ b/clang/include/clang/Serialization/ModuleCache.h
@@ -45,11 +45,15 @@ class ModuleCache : public RefCountedBase<ModuleCache> {
/// were validated.
virtual void updateModuleTimestamp(StringRef ModuleFilename) = 0;
+ /// Prune module files that haven't been accessed in a long time.
+ virtual void maybePrune(StringRef Path, time_t PruneInterval,
+ time_t PruneAfter) = 0;
+
/// Returns this process's view of the module cache.
virtual InMemoryModuleCache &getInMemoryModuleCache() = 0;
virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0;
- // TODO: Virtualize writing/reading PCM files, pruning, etc.
+ // TODO: Virtualize writing/reading PCM files, etc.
virtual ~ModuleCache() = default;
};
@@ -59,6 +63,9 @@ class ModuleCache : public RefCountedBase<ModuleCache> {
/// \c CompilerInstance instances participating in building modules for single
/// translation unit in order to share the same \c InMemoryModuleCache.
IntrusiveRefCntPtr<ModuleCache> createCrossProcessModuleCache();
+
+/// Shared implementation of `ModuleCache::maybePrune()`.
+void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter);
} // namespace clang
#endif
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c7b82db292a14..2f5cd84bb53e0 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1636,90 +1636,6 @@ static void checkConfigMacros(Preprocessor &PP, Module *M,
}
}
-/// Write a new timestamp file with the given path.
-static void writeTimestampFile(StringRef TimestampFile) {
- std::error_code EC;
- llvm::raw_fd_ostream Out(TimestampFile.str(), EC, llvm::sys::fs::OF_None);
-}
-
-/// Prune the module cache of modules that haven't been accessed in
-/// a long time.
-static void pruneModuleCache(const HeaderSearchOptions &HSOpts) {
- llvm::sys::fs::file_status StatBuf;
- llvm::SmallString<128> TimestampFile;
- TimestampFile = HSOpts.ModuleCachePath;
- assert(!TimestampFile.empty());
- llvm::sys::path::append(TimestampFile, "modules.timestamp");
-
- // Try to stat() the timestamp file.
- if (std::error_code EC = llvm::sys::fs::status(TimestampFile, StatBuf)) {
- // If the timestamp file wasn't there, create one now.
- if (EC == std::errc::no_such_file_or_directory) {
- writeTimestampFile(TimestampFile);
- }
- return;
- }
-
- // Check whether the time stamp is older than our pruning interval.
- // If not, do nothing.
- time_t TimeStampModTime =
- llvm::sys::toTimeT(StatBuf.getLastModificationTime());
- time_t CurrentTime = time(nullptr);
- if (CurrentTime - TimeStampModTime <= time_t(HSOpts.ModuleCachePruneInterval))
- return;
-
- // Write a new timestamp file so that nobody else attempts to prune.
- // There is a benign race condition here, if two Clang instances happen to
- // notice at the same time that the timestamp is out-of-date.
- writeTimestampFile(TimestampFile);
-
- // Walk the entire module cache, looking for unused module files and module
- // indices.
- std::error_code EC;
- for (llvm::sys::fs::directory_iterator Dir(HSOpts.ModuleCachePath, EC),
- DirEnd;
- Dir != DirEnd && !EC; Dir.increment(EC)) {
- // If we don't have a directory, there's nothing to look into.
- if (!llvm::sys::fs::is_directory(Dir->path()))
- continue;
-
- // Walk all of the files within this directory.
- for (llvm::sys::fs::directory_iterator File(Dir->path(), EC), FileEnd;
- File != FileEnd && !EC; File.increment(EC)) {
- // We only care about module and global module index files.
- StringRef Extension = llvm::sys::path::extension(File->path());
- if (Extension != ".pcm" && Extension != ".timestamp" &&
- llvm::sys::path::filename(File->path()) != "modules.idx")
- continue;
-
- // Look at this file. If we can't stat it, there's nothing interesting
- // there.
- if (llvm::sys::fs::status(File->path(), StatBuf))
- continue;
-
- // If the file has been used recently enough, leave it there.
- time_t FileAccessTime = llvm::sys::toTimeT(StatBuf.getLastAccessedTime());
- if (CurrentTime - FileAccessTime <=
- time_t(HSOpts.ModuleCachePruneAfter)) {
- continue;
- }
-
- // Remove the file.
- llvm::sys::fs::remove(File->path());
-
- // Remove the timestamp file.
- std::string TimpestampFilename = File->path() + ".timestamp";
- llvm::sys::fs::remove(TimpestampFilename);
- }
-
- // If we removed all of the files in the directory, remove the directory
- // itself.
- if (llvm::sys::fs::directory_iterator(Dir->path(), EC) ==
- llvm::sys::fs::directory_iterator() && !EC)
- llvm::sys::fs::remove(Dir->path());
- }
-}
-
void CompilerInstance::createASTReader() {
if (TheASTReader)
return;
@@ -1730,11 +1646,10 @@ void CompilerInstance::createASTReader() {
// If we're implicitly building modules but not currently recursively
// building a module, check whether we need to prune the module cache.
if (getSourceManager().getModuleBuildStack().empty() &&
- !getPreprocessor().getHeaderSearchInfo().getModuleCachePath().empty() &&
- getHeaderSearchOpts().ModuleCachePruneInterval > 0 &&
- getHeaderSearchOpts().ModuleCachePruneAfter > 0) {
- pruneModuleCache(getHeaderSearchOpts());
- }
+ !getPreprocessor().getHeaderSearchInfo().getModuleCachePath().empty())
+ ModCache->maybePrune(getHeaderSearchOpts().ModuleCachePath,
+ getHeaderSearchOpts().ModuleCachePruneInterval,
+ getHeaderSearchOpts().ModuleCachePruneAfter);
HeaderSearchOptions &HSOpts = getHeaderSearchOpts();
std::string Sysroot = HSOpts.Sysroot;
diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp
index f42bdc16d815d..96687277ebafd 100644
--- a/clang/lib/Serialization/ModuleCache.cpp
+++ b/clang/lib/Serialization/ModuleCache.cpp
@@ -16,6 +16,87 @@
using namespace clang;
+/// Write a new timestamp file with the given path.
+static void writeTimestampFile(StringRef TimestampFile) {
+ std::error_code EC;
+ llvm::raw_fd_ostream Out(TimestampFile.str(), EC, llvm::sys::fs::OF_None);
+}
+
+void clang::maybePruneImpl(StringRef Path, time_t PruneInterval,
+ time_t PruneAfter) {
+ if (PruneInterval <= 0 || PruneAfter <= 0)
+ return;
+
+ llvm::SmallString<128> TimestampFile(Path);
+ llvm::sys::path::append(TimestampFile, "modules.timestamp");
+
+ // Try to stat() the timestamp file.
+ llvm::sys::fs::file_status StatBuf;
+ if (std::error_code EC = llvm::sys::fs::status(TimestampFile, StatBuf)) {
+ // If the timestamp file wasn't there, create one now.
+ if (EC == std::errc::no_such_file_or_directory)
+ writeTimestampFile(TimestampFile);
+ return;
+ }
+
+ // Check whether the time stamp is older than our pruning interval.
+ // If not, do nothing.
+ time_t TimestampModTime =
+ llvm::sys::toTimeT(StatBuf.getLastModificationTime());
+ time_t CurrentTime = time(nullptr);
+ if (CurrentTime - TimestampModTime <= PruneInterval)
+ return;
+
+ // Write a new timestamp file so that nobody else attempts to prune.
+ // There is a benign race condition here, if two Clang instances happen to
+ // notice at the same time that the timestamp is out-of-date.
+ writeTimestampFile(TimestampFile);
+
+ // Walk the entire module cache, looking for unused module files and module
+ // indices.
+ std::error_code EC;
+ for (llvm::sys::fs::directory_iterator Dir(Path, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ // If we don't have a directory, there's nothing to look into.
+ if (!llvm::sys::fs::is_directory(Dir->path()))
+ continue;
+
+ // Walk all the files within this directory.
+ for (llvm::sys::fs::directory_iterator File(Dir->path(), EC), FileEnd;
+ File != FileEnd && !EC; File.increment(EC)) {
+ // We only care about module and global module index files.
+ StringRef Extension = llvm::sys::path::extension(File->path());
+ if (Extension != ".pcm" && Extension != ".timestamp" &&
+ llvm::sys::path::filename(File->path()) != "modules.idx")
+ continue;
+
+ // Look at this file. If we can't stat it, there's nothing interesting
+ // there.
+ if (llvm::sys::fs::status(File->path(), StatBuf))
+ continue;
+
+ // If the file has been used recently enough, leave it there.
+ time_t FileAccessTime = llvm::sys::toTimeT(StatBuf.getLastAccessedTime());
+ if (CurrentTime - FileAccessTime <= PruneAfter)
+ continue;
+
+ // Remove the file.
+ llvm::sys::fs::remove(File->path());
+
+ // Remove the timestamp file.
+ std::string TimpestampFilename = File->path() + ".timestamp";
+ llvm::sys::fs::remove(TimpestampFilename);
+ }
+
+ // If we removed all the files in the directory, remove the directory
+ // itself.
+ if (llvm::sys::fs::directory_iterator(Dir->path(), EC) ==
+ llvm::sys::fs::directory_iterator() &&
+ !EC)
+ llvm::sys::fs::remove(Dir->path());
+ }
+}
+
namespace {
class CrossProcessModuleCache : public ModuleCache {
InMemoryModuleCache InMemory;
@@ -53,6 +134,11 @@ class CrossProcessModuleCache : public ModuleCache {
OS.clear_error(); // Avoid triggering a fatal error.
}
+ void maybePrune(StringRef Path, time_t PruneInterval,
+ time_t PruneAfter) override {
+ maybePruneImpl(Path, PruneInterval, PruneAfter);
+ }
+
InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
const InMemoryModuleCache &getInMemoryModuleCache() const override {
return InMemory;
diff --git a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
index 80db2d47d940e..d1e543b438225 100644
--- a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
+++ b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
@@ -100,6 +100,13 @@ class InProcessModuleCache : public ModuleCache {
Timestamp.store(llvm::sys::toTimeT(std::chrono::system_clock::now()));
}
+ void maybePrune(StringRef Path, time_t PruneInterval,
+ time_t PruneAfter) override {
+ // FIXME: This only needs to be ran once per build, not in every
+ // compilation. Call it once per service.
+ maybePruneImpl(Path, PruneInterval, PruneAfter);
+ }
+
InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
const InMemoryModuleCache &getInMemoryModuleCache() const override {
return InMemory;
``````````
</details>
https://github.com/llvm/llvm-project/pull/149113
More information about the cfe-commits
mailing list