[clang-tools-extra] [clangd] [Modules] Support Reusable Modules Builder (PR #106683)
kadir çetinkaya via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 8 05:46:22 PST 2024
================
@@ -316,36 +309,221 @@ llvm::Error buildModuleFile(llvm::StringRef ModuleName,
if (Clang->getDiagnostics().hasErrorOccurred())
return llvm::createStringError("Compilation failed");
- BuiltModuleFiles.addModuleFile(ModuleName, Inputs.CompileCommand.Output);
- return llvm::Error::success();
+ return ModuleFile{ModuleName, Inputs.CompileCommand.Output, BuiltModuleFiles};
+}
+
+bool ReusablePrerequisiteModules::canReuse(
+ const CompilerInvocation &CI,
+ llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
+ if (RequiredModules.empty())
+ return true;
+
+ SmallVector<StringRef> BMIPaths;
+ for (auto &MF : RequiredModules)
+ BMIPaths.push_back(MF->getModuleFilePath());
+ return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
+}
+
+class ModuleFileCache {
+public:
+ ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
+
+ llvm::Error
+ getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
+ ProjectModules &MDB,
+ ReusablePrerequisiteModules &RequiredModules);
+ const GlobalCompilationDatabase &getCDB() const { return CDB; }
+
+ std::shared_ptr<const ModuleFile>
+ getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
+ const ThreadsafeFS &TFS,
+ PrerequisiteModules &BuiltModuleFiles);
+
+ void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
+ std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+
+ ModuleFiles.insert_or_assign(ModuleName, ModuleFile);
+ }
+
+private:
+ const GlobalCompilationDatabase &CDB;
+
+ llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
+ // Mutex to guard accesses to ModuleFiles.
+ std::mutex ModuleFilesMutex;
+};
+
+/// Collect the directly and indirectly required module names for \param
+/// ModuleName. The \param ModuleName is guaranteed to be the first element in
+/// \param ModuleNames.
+llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+ StringRef ModuleName) {
+ llvm::SmallVector<StringRef> ModuleNames;
+
+ std::queue<StringRef> Worklist;
+ llvm::StringSet<> ModuleNamesSet;
+ Worklist.push(ModuleName);
+
+ while (!Worklist.empty()) {
+ StringRef CurrentModule = Worklist.front();
+ Worklist.pop();
+
+ if (!ModuleNamesSet.insert(CurrentModule).second)
+ continue;
+
+ ModuleNames.push_back(CurrentModule);
+
+ for (StringRef RequiredModuleName :
+ MDB.getRequiredModules(MDB.getSourceForModuleName(CurrentModule)))
+ if (!ModuleNamesSet.contains(RequiredModuleName))
+ Worklist.push(RequiredModuleName);
+ }
+
+ return ModuleNames;
+}
+
+std::shared_ptr<const ModuleFile>
+ModuleFileCache::getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
+ const ThreadsafeFS &TFS,
+ PrerequisiteModules &BuiltModuleFiles) {
+ {
+ std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+
+ auto Iter = ModuleFiles.find(ModuleName);
+ if (Iter == ModuleFiles.end())
+ return nullptr;
+
+ if (Iter->second.expired()) {
+ ModuleFiles.erase(Iter);
+ return nullptr;
+ }
+ }
+
+ llvm::SmallVector<StringRef> ModuleNames =
+ getAllRequiredModules(MDB, ModuleName);
+
+ llvm::SmallVector<std::shared_ptr<const ModuleFile>> RequiredModuleFiles;
+
+ {
+ std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+
+ for (StringRef ModuleName : ModuleNames) {
+ auto Iter = ModuleFiles.find(ModuleName);
+ if (Iter == ModuleFiles.end())
+ return nullptr;
+
+ if (Iter->second.expired()) {
+ ModuleFiles.erase(Iter);
+ return nullptr;
+ }
+
+ RequiredModuleFiles.push_back(Iter->second.lock());
+ }
+ }
+
+ assert(!RequiredModuleFiles.empty());
+
+ if (llvm::any_of(RequiredModuleFiles,
+ [&](std::shared_ptr<const ModuleFile> MF) {
+ return !IsModuleFileUpToDate(MF->getModuleFilePath(),
+ BuiltModuleFiles,
+ TFS.view(std::nullopt));
+ }))
+ return nullptr;
+
+ return RequiredModuleFiles[0];
}
} // namespace
+class ModulesBuilder::ModulesBuilderImpl {
+public:
+ ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
+
+ const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
+
+ llvm::Error
+ getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
+ ProjectModules &MDB,
+ ReusablePrerequisiteModules &BuiltModuleFiles);
+
+private:
+ ModuleFileCache Cache;
+};
+
+llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
+ StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
+ ReusablePrerequisiteModules &BuiltModuleFiles) {
+ if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
+ return llvm::Error::success();
+
+ PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
+ /// It is possible that we're meeting third party modules (modules whose
+ /// source are not in the project. e.g, the std module may be a third-party
+ /// module for most project) or something wrong with the implementation of
+ /// ProjectModules.
+ /// FIXME: How should we treat third party modules here? If we want to ignore
+ /// third party modules, we should return true instead of false here.
+ /// Currently we simply bail out.
+ if (ModuleUnitFileName.empty())
+ return llvm::createStringError(
+ llvm::formatv("Don't get the module unit for module {0}", ModuleName));
+
+ for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName))
+ // Return early if there are errors building the module file.
+ if (!getOrBuildModuleFile(RequiredModuleName, TFS, MDB, BuiltModuleFiles))
+ return llvm::createStringError(
+ llvm::formatv("Failed to build module {0}", RequiredModuleName));
+
+ if (std::shared_ptr<const ModuleFile> Cached =
+ Cache.getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
----------------
kadircet wrote:
loop and recursion above already ensuers we built all the required-modules for `ModuleName`, but inside this function we'll perform another check for freshness of all the required modules. this is really unfortunate both for making progress (e.g. if a dependent module is modified while we're building an outer module, we'll fail and restart from scratch. but also it means for every module we want to build, we keep traversing all of its dependencies making the runtime quadratic in terms of dependent modules).
What about performing something like:
```cpp
llvm::Expected<ReusablePrerequisiteModules> ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB) {
ReusablePrerequisiteModules BuiltModuleFiles;
auto ReqModules = getAllRequiredModules(MDB, ModuleName); // This returns modules in topological order.
for (auto ModName : ReqModules) {
if (auto Cached = Cache.getModule(ModName)) { // Just gets the module if it exists, no checks for freshness.
if (IsModuleFileUpToDate(Cached, BuiltModuleFiles, ...) {
BuiltModuleFiles.add(std::move(Cached));
continue;
}
// cache is stale, clean it up.
Cache.remove(ModuleName);
}
auto BuiltMF = buildModuleFile(...., BuiltModuleFiles);
if (!BuiltMF) {
return Err;
}
BuiltModuleFiles.add(BuiltMF);
Cache.add(BuiltMF);
}
return BuiltModuleFiles;
}
```
https://github.com/llvm/llvm-project/pull/106683
More information about the cfe-commits
mailing list