[clang-tools-extra] [clangd] [modules] Avoid building duplicated modules at the same time (PR #114365)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 30 22:58:32 PDT 2024


https://github.com/ChuanqiXu9 created https://github.com/llvm/llvm-project/pull/114365

Following of https://github.com/llvm/llvm-project/pull/106683

In that, if we open multiple tabs importing the same module, we're going to build the same in the corresponding threads. This is not good. This patch tries to optimize such cases. 

Due that PR doesn't start with a branch in llvm's repo, so this one can't be simply based on that. So we have to wait for that one landed first.

>From 15aa6af1f5d22e4b837e8e2fd49469310ffbe7f1 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 30 Aug 2024 15:11:07 +0800
Subject: [PATCH 1/4] [clangd] [Modules] Support Reusable Modules Builder

---
 clang-tools-extra/clangd/ModulesBuilder.cpp   | 375 ++++++++++++++----
 clang-tools-extra/clangd/ModulesBuilder.h     |   8 +-
 .../unittests/PrerequisiteModulesTest.cpp     |  36 ++
 3 files changed, 344 insertions(+), 75 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 97f67ddf5495a2..c4fb3d4010d370 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -13,6 +13,7 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
+#include "llvm/ADT/ScopeExit.h"
 
 namespace clang {
 namespace clangd {
@@ -192,33 +193,27 @@ bool IsModuleFilesUpToDate(
       });
 }
 
-// StandalonePrerequisiteModules - stands for PrerequisiteModules for which all
+// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
 // the required modules are built successfully. All the module files
-// are owned by the StandalonePrerequisiteModules class.
-//
-// Any of the built module files won't be shared with other instances of the
-// class. So that we can avoid worrying thread safety.
-//
-// We don't need to worry about duplicated module names here since the standard
-// guarantees the module names should be unique to a program.
-class StandalonePrerequisiteModules : public PrerequisiteModules {
+// are owned by the modules builder.
+class ReusablePrerequisiteModules : public PrerequisiteModules {
 public:
-  StandalonePrerequisiteModules() = default;
+  ReusablePrerequisiteModules() = default;
 
-  StandalonePrerequisiteModules(const StandalonePrerequisiteModules &) = delete;
-  StandalonePrerequisiteModules
-  operator=(const StandalonePrerequisiteModules &) = delete;
-  StandalonePrerequisiteModules(StandalonePrerequisiteModules &&) = delete;
-  StandalonePrerequisiteModules
-  operator=(StandalonePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &) = delete;
+  ReusablePrerequisiteModules
+  operator=(const ReusablePrerequisiteModules &) = delete;
+  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules
+  operator=(ReusablePrerequisiteModules &&) = delete;
 
-  ~StandalonePrerequisiteModules() override = default;
+  ~ReusablePrerequisiteModules() override = default;
 
   void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
     // Appending all built module files.
     for (auto &RequiredModule : RequiredModules)
       Options.PrebuiltModuleFiles.insert_or_assign(
-          RequiredModule.ModuleName, RequiredModule.ModuleFilePath);
+          RequiredModule->ModuleName, RequiredModule->ModuleFilePath);
   }
 
   bool canReuse(const CompilerInvocation &CI,
@@ -228,54 +223,30 @@ class StandalonePrerequisiteModules : public PrerequisiteModules {
     return BuiltModuleNames.contains(ModuleName);
   }
 
-  void addModuleFile(llvm::StringRef ModuleName,
-                     llvm::StringRef ModuleFilePath) {
-    RequiredModules.emplace_back(ModuleName, ModuleFilePath);
-    BuiltModuleNames.insert(ModuleName);
+  void addModuleFile(std::shared_ptr<ModuleFile> BMI) {
+    BuiltModuleNames.insert(BMI->ModuleName);
+    RequiredModules.emplace_back(std::move(BMI));
   }
 
 private:
-  llvm::SmallVector<ModuleFile, 8> RequiredModules;
+  mutable llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules;
   // A helper class to speedup the query if a module is built.
   llvm::StringSet<> BuiltModuleNames;
 };
 
-// Build a module file for module with `ModuleName`. The information of built
-// module file are stored in \param BuiltModuleFiles.
-llvm::Error buildModuleFile(llvm::StringRef ModuleName,
-                            const GlobalCompilationDatabase &CDB,
-                            const ThreadsafeFS &TFS, ProjectModules &MDB,
-                            PathRef ModuleFilesPrefix,
-                            StandalonePrerequisiteModules &BuiltModuleFiles) {
-  if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
-    return llvm::Error::success();
-
-  PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
-  // It is possible that we're meeting third party modules (modules whose
-  // source are not in the project. e.g, the std module may be a third-party
-  // module for most projects) or something wrong with the implementation of
-  // ProjectModules.
-  // FIXME: How should we treat third party modules here? If we want to ignore
-  // third party modules, we should return true instead of false here.
-  // Currently we simply bail out.
-  if (ModuleUnitFileName.empty())
-    return llvm::createStringError("Failed to get the primary source");
-
+/// Build a module file for module with `ModuleName`. The information of built
+/// module file are stored in \param BuiltModuleFiles.
+llvm::Expected<ModuleFile>
+buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
+                const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
+                PathRef ModuleFilesPrefix,
+                const ReusablePrerequisiteModules &BuiltModuleFiles) {
   // Try cheap operation earlier to boil-out cheaply if there are problems.
   auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
   if (!Cmd)
     return llvm::createStringError(
         llvm::formatv("No compile command for {0}", ModuleUnitFileName));
 
-  for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName)) {
-    // Return early if there are errors building the module file.
-    if (llvm::Error Err = buildModuleFile(RequiredModuleName, CDB, TFS, MDB,
-                                          ModuleFilesPrefix, BuiltModuleFiles))
-      return llvm::createStringError(
-          llvm::formatv("Failed to build dependency {0}: {1}",
-                        RequiredModuleName, llvm::toString(std::move(Err))));
-  }
-
   Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
 
   ParseInputs Inputs;
@@ -316,15 +287,282 @@ llvm::Error buildModuleFile(llvm::StringRef ModuleName,
   if (Clang->getDiagnostics().hasErrorOccurred())
     return llvm::createStringError("Compilation failed");
 
-  BuiltModuleFiles.addModuleFile(ModuleName, Inputs.CompileCommand.Output);
-  return llvm::Error::success();
+  return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
+}
+
+bool ReusablePrerequisiteModules::canReuse(
+    const CompilerInvocation &CI,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
+  if (RequiredModules.empty())
+    return true;
+
+  SmallVector<StringRef> BMIPaths;
+  for (auto &MF : RequiredModules)
+    BMIPaths.push_back(MF->ModuleFilePath);
+  return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
 }
 } // namespace
 
+class ModulesBuilder::ModuleFileCache {
+public:
+  ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
+
+  llvm::Error
+  getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
+                       ProjectModules &MDB,
+                       ReusablePrerequisiteModules &RequiredModules);
+  const GlobalCompilationDatabase &getCDB() const { return CDB; }
+
+private:
+  std::shared_ptr<ModuleFile>
+  getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
+                     const ThreadsafeFS &TFS,
+                     PrerequisiteModules &BuiltModuleFiles);
+
+  /// This should only be called by getValidModuleFile. This is unlocked version
+  /// of getValidModuleFile. The function is extracted to avoid dead locks when
+  /// recursing.
+  std::shared_ptr<ModuleFile>
+  isValidModuleFileUnlocked(StringRef ModuleName, ProjectModules &MDB,
+                            const ThreadsafeFS &TFS,
+                            PrerequisiteModules &BuiltModuleFiles);
+
+  void startBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.insert(ModuleName);
+  }
+  void endBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.erase(ModuleName);
+  }
+  bool isBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    return BuildingModules.contains(ModuleName);
+  }
+
+  const GlobalCompilationDatabase &CDB;
+
+  llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
+  // Mutex to guard accesses to ModuleFiles.
+  std::mutex ModuleFilesMutex;
+
+  // We should only build a unique module at most at the same time.
+  // When we want to build a module, use the mutex to lock it and use the
+  // condition variable to notify other threads the status of the build results.
+  //
+  // Store the mutex and the condition_variable in shared_ptr since they may be
+  // accessed by many threads.
+  llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes;
+  llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs;
+  // The building modules set. A successed built module or a failed module or
+  // an unbuilt module shouldn't be in this set.
+  // This set is helpful to control the behavior of the condition variables.
+  llvm::StringSet<> BuildingModules;
+  // Lock when we access BuildingModules, BuildingModuleMutexes and
+  // BuildingModuleCVs.
+  std::mutex ModulesBuildingMutex;
+
+  /// An RAII object to guard the process to build a specific module.
+  struct ModuleBuildingSharedOwner {
+  public:
+    ModuleBuildingSharedOwner(StringRef ModuleName,
+                              std::shared_ptr<std::mutex> &Mutex,
+                              std::shared_ptr<std::condition_variable> &CV,
+                              ModuleFileCache &Cache)
+        : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) {
+      IsFirstTask = (Mutex.use_count() == 2);
+    }
+
+    ~ModuleBuildingSharedOwner();
+
+    bool isUniqueBuildingOwner() { return IsFirstTask; }
+
+    std::mutex &getMutex() { return *Mutex; }
+
+    std::condition_variable &getCV() { return *CV; }
+
+  private:
+    StringRef ModuleName;
+    std::shared_ptr<std::mutex> Mutex;
+    std::shared_ptr<std::condition_variable> CV;
+    ModuleFileCache &Cache;
+    bool IsFirstTask;
+  };
+
+  ModuleBuildingSharedOwner
+  getOrCreateModuleBuildingOwner(StringRef ModuleName);
+};
+
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner::
+    ~ModuleBuildingSharedOwner() {
+  std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex);
+
+  Mutex.reset();
+  CV.reset();
+
+  // Try to release the memory in builder if possible.
+  if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName);
+      Iter != Cache.BuildingModuleCVs.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleCVs.erase(Iter);
+
+  if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName);
+      Iter != Cache.BuildingModuleMutexes.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleMutexes.erase(Iter);
+}
+
+std::shared_ptr<ModuleFile>
+ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
+    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
+    PrerequisiteModules &BuiltModuleFiles) {
+  auto Iter = ModuleFiles.find(ModuleName);
+  if (Iter != ModuleFiles.end()) {
+    if (!IsModuleFileUpToDate(Iter->second->ModuleFilePath, BuiltModuleFiles,
+                              TFS.view(std::nullopt))) {
+      log("Found not-up-date module file {0} for module {1} in cache",
+          Iter->second->ModuleFilePath, ModuleName);
+      ModuleFiles.erase(Iter);
+      return nullptr;
+    }
+
+    if (llvm::any_of(
+            MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)),
+            [&MDB, &TFS, &BuiltModuleFiles, this](auto &&RequiredModuleName) {
+              return !isValidModuleFileUnlocked(RequiredModuleName, MDB, TFS,
+                                                BuiltModuleFiles);
+            })) {
+      ModuleFiles.erase(Iter);
+      return nullptr;
+    }
+
+    return Iter->second;
+  }
+
+  log("Don't find {0} in cache", ModuleName);
+
+  return nullptr;
+}
+
+std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
+    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
+    PrerequisiteModules &BuiltModuleFiles) {
+  std::lock_guard<std::mutex> _(ModuleFilesMutex);
+
+  return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
+}
+
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner
+ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner(
+    StringRef ModuleName) {
+  std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+
+  auto MutexIter = BuildingModuleMutexes.find(ModuleName);
+  if (MutexIter == BuildingModuleMutexes.end())
+    MutexIter = BuildingModuleMutexes
+                    .try_emplace(ModuleName, std::make_shared<std::mutex>())
+                    .first;
+
+  auto CVIter = BuildingModuleCVs.find(ModuleName);
+  if (CVIter == BuildingModuleCVs.end())
+    CVIter = BuildingModuleCVs
+                 .try_emplace(ModuleName,
+                              std::make_shared<std::condition_variable>())
+                 .first;
+
+  return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(),
+                                   CVIter->getValue(), *this);
+}
+
+llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
+    StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
+    ReusablePrerequisiteModules &BuiltModuleFiles) {
+  if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
+    return llvm::Error::success();
+
+  PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
+  /// It is possible that we're meeting third party modules (modules whose
+  /// source are not in the project. e.g, the std module may be a third-party
+  /// module for most project) or something wrong with the implementation of
+  /// ProjectModules.
+  /// FIXME: How should we treat third party modules here? If we want to ignore
+  /// third party modules, we should return true instead of false here.
+  /// Currently we simply bail out.
+  if (ModuleUnitFileName.empty())
+    return llvm::createStringError(
+        llvm::formatv("Don't get the module unit for module {0}", ModuleName));
+
+  for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName))
+    // Return early if there are errors building the module file.
+    if (!getOrBuildModuleFile(RequiredModuleName, TFS, MDB, BuiltModuleFiles))
+      return llvm::createStringError(
+          llvm::formatv("Failed to build module {0}", RequiredModuleName));
+
+  if (std::shared_ptr<ModuleFile> Cached =
+          getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
+    log("Reusing module {0} from {1}", ModuleName, Cached->ModuleFilePath);
+    BuiltModuleFiles.addModuleFile(Cached);
+    return llvm::Error::success();
+  }
+
+  ModuleBuildingSharedOwner ModuleBuildingOwner =
+      getOrCreateModuleBuildingOwner(ModuleName);
+
+  std::condition_variable &CV = ModuleBuildingOwner.getCV();
+  std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex());
+  if (!ModuleBuildingOwner.isUniqueBuildingOwner()) {
+    log("Waiting other task for module {0}", ModuleName);
+    CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); });
+
+    // Try to access the built module files from other threads manually.
+    // We don't call getValidModuleFile here since it may be too heavy.
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+    auto Iter = ModuleFiles.find(ModuleName);
+    if (Iter != ModuleFiles.end()) {
+      log("Got module file from other task building {0}", ModuleName);
+      BuiltModuleFiles.addModuleFile(Iter->second);
+      return llvm::Error::success();
+    }
+
+    // If the module file is not in the cache, it indicates that the building
+    // from other thread failed, so we give up earlier in this case to avoid
+    // wasting time.
+    return llvm::createStringError(llvm::formatv(
+        "The module file {0} may be failed to build in other thread.",
+        ModuleName));
+  }
+
+  log("Building module {0}", ModuleName);
+  startBuildingModule(ModuleName);
+
+  auto _ = llvm::make_scope_exit([&]() {
+    endBuildingModule(ModuleName);
+    CV.notify_all();
+  });
+
+  llvm::SmallString<256> ModuleFilesPrefix =
+      getUniqueModuleFilesPath(ModuleUnitFileName);
+
+  llvm::Expected<ModuleFile> MF =
+      buildModuleFile(ModuleName, ModuleUnitFileName, CDB, TFS,
+                      ModuleFilesPrefix, BuiltModuleFiles);
+  if (llvm::Error Err = MF.takeError())
+    return Err;
+
+  log("Built module {0} to {1}", ModuleName, MF->ModuleFilePath);
+
+  std::lock_guard<std::mutex> __(ModuleFilesMutex);
+  auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF));
+  ModuleFiles.insert_or_assign(ModuleName, BuiltModuleFile);
+  BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
+
+  return llvm::Error::success();
+}
 std::unique_ptr<PrerequisiteModules>
 ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
-                                            const ThreadsafeFS &TFS) const {
-  std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File);
+                                            const ThreadsafeFS &TFS) {
+  std::unique_ptr<ProjectModules> MDB =
+      MFCache->getCDB().getProjectModules(File);
   if (!MDB) {
     elog("Failed to get Project Modules information for {0}", File);
     return std::make_unique<FailedPrerequisiteModules>();
@@ -332,20 +570,19 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
 
   std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
   if (RequiredModuleNames.empty())
-    return std::make_unique<StandalonePrerequisiteModules>();
+    return std::make_unique<ReusablePrerequisiteModules>();
 
   llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(File);
 
   log("Trying to build required modules for {0} in {1}", File,
       ModuleFilesPrefix);
 
-  auto RequiredModules = std::make_unique<StandalonePrerequisiteModules>();
+  auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
 
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
-    if (llvm::Error Err =
-            buildModuleFile(RequiredModuleName, CDB, TFS, *MDB.get(),
-                            ModuleFilesPrefix, *RequiredModules.get())) {
+    if (llvm::Error Err = MFCache->getOrBuildModuleFile(
+            RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
       elog("Failed to build module {0}; due to {1}", RequiredModuleName,
            toString(std::move(Err)));
       return std::make_unique<FailedPrerequisiteModules>();
@@ -357,17 +594,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
   return std::move(RequiredModules);
 }
 
-bool StandalonePrerequisiteModules::canReuse(
-    const CompilerInvocation &CI,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
-  if (RequiredModules.empty())
-    return true;
-
-  SmallVector<StringRef> BMIPaths;
-  for (auto &MF : RequiredModules)
-    BMIPaths.push_back(MF.ModuleFilePath);
-  return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
+ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
+  MFCache = std::make_unique<ModuleFileCache>(CDB);
 }
 
+ModulesBuilder::~ModulesBuilder() {}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/ModulesBuilder.h b/clang-tools-extra/clangd/ModulesBuilder.h
index 0514e7486475d0..2aae441747a898 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.h
+++ b/clang-tools-extra/clangd/ModulesBuilder.h
@@ -85,7 +85,8 @@ class PrerequisiteModules {
 /// different versions and different source files.
 class ModulesBuilder {
 public:
-  ModulesBuilder(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
+  ModulesBuilder(const GlobalCompilationDatabase &CDB);
+  ~ModulesBuilder();
 
   ModulesBuilder(const ModulesBuilder &) = delete;
   ModulesBuilder(ModulesBuilder &&) = delete;
@@ -94,10 +95,11 @@ class ModulesBuilder {
   ModulesBuilder &operator=(ModulesBuilder &&) = delete;
 
   std::unique_ptr<PrerequisiteModules>
-  buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) const;
+  buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS);
 
 private:
-  const GlobalCompilationDatabase &CDB;
+  class ModuleFileCache;
+  std::unique_ptr<ModuleFileCache> MFCache;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 691a93e7acd0af..9577935c48b5ec 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -482,6 +482,42 @@ void func() {
   EXPECT_EQ(Result.signatures[0].parameters[0].labelString, "int a");
 }
 
+TEST_F(PrerequisiteModulesTests, ReusablePrerequisiteModulesTest) {
+  MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+export int M = 43;
+  )cpp");
+  CDB.addFile("A.cppm", R"cpp(
+export module A;
+import M;
+export int A = 43 + M;
+  )cpp");
+  CDB.addFile("B.cppm", R"cpp(
+export module B;
+import M;
+export int B = 44 + M;
+  )cpp");
+
+  ModulesBuilder Builder(CDB);
+
+  auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+  EXPECT_TRUE(AInfo);
+  auto BInfo = Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+  EXPECT_TRUE(BInfo);
+  HeaderSearchOptions HSOptsA(TestDir);
+  HeaderSearchOptions HSOptsB(TestDir);
+  AInfo->adjustHeaderSearchOptions(HSOptsA);
+  BInfo->adjustHeaderSearchOptions(HSOptsB);
+
+  EXPECT_FALSE(HSOptsA.PrebuiltModuleFiles.empty());
+  EXPECT_FALSE(HSOptsB.PrebuiltModuleFiles.empty());
+
+  // Check that we're reusing the module files.
+  EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles);
+}
+
 } // namespace
 } // namespace clang::clangd
 

>From 4ed7bf5fadb8a3879fc9862537a24a4d9990a055 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 31 Oct 2024 13:35:59 +0800
Subject: [PATCH 2/4] Remove unique builder

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 133 --------------------
 1 file changed, 133 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index c4fb3d4010d370..e1c0cc743097f5 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -327,91 +327,13 @@ class ModulesBuilder::ModuleFileCache {
                             const ThreadsafeFS &TFS,
                             PrerequisiteModules &BuiltModuleFiles);
 
-  void startBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    BuildingModules.insert(ModuleName);
-  }
-  void endBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    BuildingModules.erase(ModuleName);
-  }
-  bool isBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    return BuildingModules.contains(ModuleName);
-  }
-
   const GlobalCompilationDatabase &CDB;
 
   llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
   // Mutex to guard accesses to ModuleFiles.
   std::mutex ModuleFilesMutex;
-
-  // We should only build a unique module at most at the same time.
-  // When we want to build a module, use the mutex to lock it and use the
-  // condition variable to notify other threads the status of the build results.
-  //
-  // Store the mutex and the condition_variable in shared_ptr since they may be
-  // accessed by many threads.
-  llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes;
-  llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs;
-  // The building modules set. A successed built module or a failed module or
-  // an unbuilt module shouldn't be in this set.
-  // This set is helpful to control the behavior of the condition variables.
-  llvm::StringSet<> BuildingModules;
-  // Lock when we access BuildingModules, BuildingModuleMutexes and
-  // BuildingModuleCVs.
-  std::mutex ModulesBuildingMutex;
-
-  /// An RAII object to guard the process to build a specific module.
-  struct ModuleBuildingSharedOwner {
-  public:
-    ModuleBuildingSharedOwner(StringRef ModuleName,
-                              std::shared_ptr<std::mutex> &Mutex,
-                              std::shared_ptr<std::condition_variable> &CV,
-                              ModuleFileCache &Cache)
-        : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) {
-      IsFirstTask = (Mutex.use_count() == 2);
-    }
-
-    ~ModuleBuildingSharedOwner();
-
-    bool isUniqueBuildingOwner() { return IsFirstTask; }
-
-    std::mutex &getMutex() { return *Mutex; }
-
-    std::condition_variable &getCV() { return *CV; }
-
-  private:
-    StringRef ModuleName;
-    std::shared_ptr<std::mutex> Mutex;
-    std::shared_ptr<std::condition_variable> CV;
-    ModuleFileCache &Cache;
-    bool IsFirstTask;
-  };
-
-  ModuleBuildingSharedOwner
-  getOrCreateModuleBuildingOwner(StringRef ModuleName);
 };
 
-ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner::
-    ~ModuleBuildingSharedOwner() {
-  std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex);
-
-  Mutex.reset();
-  CV.reset();
-
-  // Try to release the memory in builder if possible.
-  if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName);
-      Iter != Cache.BuildingModuleCVs.end() &&
-      Iter->getValue().use_count() == 1)
-    Cache.BuildingModuleCVs.erase(Iter);
-
-  if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName);
-      Iter != Cache.BuildingModuleMutexes.end() &&
-      Iter->getValue().use_count() == 1)
-    Cache.BuildingModuleMutexes.erase(Iter);
-}
-
 std::shared_ptr<ModuleFile>
 ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
     StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
@@ -452,28 +374,6 @@ std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
   return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
 }
 
-ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner
-ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner(
-    StringRef ModuleName) {
-  std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-
-  auto MutexIter = BuildingModuleMutexes.find(ModuleName);
-  if (MutexIter == BuildingModuleMutexes.end())
-    MutexIter = BuildingModuleMutexes
-                    .try_emplace(ModuleName, std::make_shared<std::mutex>())
-                    .first;
-
-  auto CVIter = BuildingModuleCVs.find(ModuleName);
-  if (CVIter == BuildingModuleCVs.end())
-    CVIter = BuildingModuleCVs
-                 .try_emplace(ModuleName,
-                              std::make_shared<std::condition_variable>())
-                 .first;
-
-  return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(),
-                                   CVIter->getValue(), *this);
-}
-
 llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
     ReusablePrerequisiteModules &BuiltModuleFiles) {
@@ -505,40 +405,7 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     return llvm::Error::success();
   }
 
-  ModuleBuildingSharedOwner ModuleBuildingOwner =
-      getOrCreateModuleBuildingOwner(ModuleName);
-
-  std::condition_variable &CV = ModuleBuildingOwner.getCV();
-  std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex());
-  if (!ModuleBuildingOwner.isUniqueBuildingOwner()) {
-    log("Waiting other task for module {0}", ModuleName);
-    CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); });
-
-    // Try to access the built module files from other threads manually.
-    // We don't call getValidModuleFile here since it may be too heavy.
-    std::lock_guard<std::mutex> _(ModuleFilesMutex);
-    auto Iter = ModuleFiles.find(ModuleName);
-    if (Iter != ModuleFiles.end()) {
-      log("Got module file from other task building {0}", ModuleName);
-      BuiltModuleFiles.addModuleFile(Iter->second);
-      return llvm::Error::success();
-    }
-
-    // If the module file is not in the cache, it indicates that the building
-    // from other thread failed, so we give up earlier in this case to avoid
-    // wasting time.
-    return llvm::createStringError(llvm::formatv(
-        "The module file {0} may be failed to build in other thread.",
-        ModuleName));
-  }
-
   log("Building module {0}", ModuleName);
-  startBuildingModule(ModuleName);
-
-  auto _ = llvm::make_scope_exit([&]() {
-    endBuildingModule(ModuleName);
-    CV.notify_all();
-  });
 
   llvm::SmallString<256> ModuleFilesPrefix =
       getUniqueModuleFilesPath(ModuleUnitFileName);

>From 9faa7447d46a322104f33739ab94c892039b33a1 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 30 Aug 2024 15:11:07 +0800
Subject: [PATCH 3/4] [clangd] [Modules] Avoid building the same module at the
 same time

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 133 ++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index e1c0cc743097f5..c4fb3d4010d370 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -327,13 +327,91 @@ class ModulesBuilder::ModuleFileCache {
                             const ThreadsafeFS &TFS,
                             PrerequisiteModules &BuiltModuleFiles);
 
+  void startBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.insert(ModuleName);
+  }
+  void endBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.erase(ModuleName);
+  }
+  bool isBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    return BuildingModules.contains(ModuleName);
+  }
+
   const GlobalCompilationDatabase &CDB;
 
   llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
   // Mutex to guard accesses to ModuleFiles.
   std::mutex ModuleFilesMutex;
+
+  // We should only build a unique module at most at the same time.
+  // When we want to build a module, use the mutex to lock it and use the
+  // condition variable to notify other threads the status of the build results.
+  //
+  // Store the mutex and the condition_variable in shared_ptr since they may be
+  // accessed by many threads.
+  llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes;
+  llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs;
+  // The building modules set. A successed built module or a failed module or
+  // an unbuilt module shouldn't be in this set.
+  // This set is helpful to control the behavior of the condition variables.
+  llvm::StringSet<> BuildingModules;
+  // Lock when we access BuildingModules, BuildingModuleMutexes and
+  // BuildingModuleCVs.
+  std::mutex ModulesBuildingMutex;
+
+  /// An RAII object to guard the process to build a specific module.
+  struct ModuleBuildingSharedOwner {
+  public:
+    ModuleBuildingSharedOwner(StringRef ModuleName,
+                              std::shared_ptr<std::mutex> &Mutex,
+                              std::shared_ptr<std::condition_variable> &CV,
+                              ModuleFileCache &Cache)
+        : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) {
+      IsFirstTask = (Mutex.use_count() == 2);
+    }
+
+    ~ModuleBuildingSharedOwner();
+
+    bool isUniqueBuildingOwner() { return IsFirstTask; }
+
+    std::mutex &getMutex() { return *Mutex; }
+
+    std::condition_variable &getCV() { return *CV; }
+
+  private:
+    StringRef ModuleName;
+    std::shared_ptr<std::mutex> Mutex;
+    std::shared_ptr<std::condition_variable> CV;
+    ModuleFileCache &Cache;
+    bool IsFirstTask;
+  };
+
+  ModuleBuildingSharedOwner
+  getOrCreateModuleBuildingOwner(StringRef ModuleName);
 };
 
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner::
+    ~ModuleBuildingSharedOwner() {
+  std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex);
+
+  Mutex.reset();
+  CV.reset();
+
+  // Try to release the memory in builder if possible.
+  if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName);
+      Iter != Cache.BuildingModuleCVs.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleCVs.erase(Iter);
+
+  if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName);
+      Iter != Cache.BuildingModuleMutexes.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleMutexes.erase(Iter);
+}
+
 std::shared_ptr<ModuleFile>
 ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
     StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
@@ -374,6 +452,28 @@ std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
   return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
 }
 
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner
+ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner(
+    StringRef ModuleName) {
+  std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+
+  auto MutexIter = BuildingModuleMutexes.find(ModuleName);
+  if (MutexIter == BuildingModuleMutexes.end())
+    MutexIter = BuildingModuleMutexes
+                    .try_emplace(ModuleName, std::make_shared<std::mutex>())
+                    .first;
+
+  auto CVIter = BuildingModuleCVs.find(ModuleName);
+  if (CVIter == BuildingModuleCVs.end())
+    CVIter = BuildingModuleCVs
+                 .try_emplace(ModuleName,
+                              std::make_shared<std::condition_variable>())
+                 .first;
+
+  return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(),
+                                   CVIter->getValue(), *this);
+}
+
 llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
     ReusablePrerequisiteModules &BuiltModuleFiles) {
@@ -405,7 +505,40 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     return llvm::Error::success();
   }
 
+  ModuleBuildingSharedOwner ModuleBuildingOwner =
+      getOrCreateModuleBuildingOwner(ModuleName);
+
+  std::condition_variable &CV = ModuleBuildingOwner.getCV();
+  std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex());
+  if (!ModuleBuildingOwner.isUniqueBuildingOwner()) {
+    log("Waiting other task for module {0}", ModuleName);
+    CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); });
+
+    // Try to access the built module files from other threads manually.
+    // We don't call getValidModuleFile here since it may be too heavy.
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+    auto Iter = ModuleFiles.find(ModuleName);
+    if (Iter != ModuleFiles.end()) {
+      log("Got module file from other task building {0}", ModuleName);
+      BuiltModuleFiles.addModuleFile(Iter->second);
+      return llvm::Error::success();
+    }
+
+    // If the module file is not in the cache, it indicates that the building
+    // from other thread failed, so we give up earlier in this case to avoid
+    // wasting time.
+    return llvm::createStringError(llvm::formatv(
+        "The module file {0} may be failed to build in other thread.",
+        ModuleName));
+  }
+
   log("Building module {0}", ModuleName);
+  startBuildingModule(ModuleName);
+
+  auto _ = llvm::make_scope_exit([&]() {
+    endBuildingModule(ModuleName);
+    CV.notify_all();
+  });
 
   llvm::SmallString<256> ModuleFilesPrefix =
       getUniqueModuleFilesPath(ModuleUnitFileName);

>From ed160e18b941bca16a87f0ea400aca2406967ae1 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 31 Oct 2024 13:55:03 +0800
Subject: [PATCH 4/4] add unittest

---
 .../unittests/PrerequisiteModulesTest.cpp     | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 9577935c48b5ec..a6c57acba8d40a 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -518,6 +518,52 @@ export int B = 44 + M;
   EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles);
 }
 
+TEST_F(PrerequisiteModulesTests, DeduplicateReusablePrerequisiteModulesTest) {
+  MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+export int M = 43;
+  )cpp");
+  CDB.addFile("A.cppm", R"cpp(
+export module A;
+import M;
+export int A = 43 + M;
+  )cpp");
+  CDB.addFile("B.cppm", R"cpp(
+export module B;
+import M;
+export int B = 44 + M;
+  )cpp");
+
+  ModulesBuilder Builder(CDB);
+
+  std::unique_ptr<PrerequisiteModules> AInfo, BInfo;
+
+  // Trying to build A and B at the same time and check we will only build module M once.
+  AsyncTaskRunner ThreadPool;
+  ThreadPool.runAsync("A Job", [&] () {
+    AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+  });
+  ThreadPool.runAsync("B Job", [&] () {
+    BInfo = Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+  });
+  ThreadPool.wait();
+
+  EXPECT_TRUE(AInfo);
+  EXPECT_TRUE(BInfo);
+  HeaderSearchOptions HSOptsA(TestDir);
+  HeaderSearchOptions HSOptsB(TestDir);
+  AInfo->adjustHeaderSearchOptions(HSOptsA);
+  BInfo->adjustHeaderSearchOptions(HSOptsB);
+
+  EXPECT_FALSE(HSOptsA.PrebuiltModuleFiles.empty());
+  EXPECT_FALSE(HSOptsB.PrebuiltModuleFiles.empty());
+
+  // Check that we're reusing the module files.
+  EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles);
+}
+
 } // namespace
 } // namespace clang::clangd
 



More information about the cfe-commits mailing list