[clang-tools-extra] [clangd] [Modules] Support Reusable Modules Builder (PR #106683)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 12 01:30:26 PST 2024


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/106683

>From 15aa6af1f5d22e4b837e8e2fd49469310ffbe7f1 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 30 Aug 2024 15:11:07 +0800
Subject: [PATCH 1/7] [clangd] [Modules] Support Reusable Modules Builder

---
 clang-tools-extra/clangd/ModulesBuilder.cpp   | 375 ++++++++++++++----
 clang-tools-extra/clangd/ModulesBuilder.h     |   8 +-
 .../unittests/PrerequisiteModulesTest.cpp     |  36 ++
 3 files changed, 344 insertions(+), 75 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 97f67ddf5495a2..c4fb3d4010d370 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -13,6 +13,7 @@
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
+#include "llvm/ADT/ScopeExit.h"
 
 namespace clang {
 namespace clangd {
@@ -192,33 +193,27 @@ bool IsModuleFilesUpToDate(
       });
 }
 
-// StandalonePrerequisiteModules - stands for PrerequisiteModules for which all
+// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
 // the required modules are built successfully. All the module files
-// are owned by the StandalonePrerequisiteModules class.
-//
-// Any of the built module files won't be shared with other instances of the
-// class. So that we can avoid worrying thread safety.
-//
-// We don't need to worry about duplicated module names here since the standard
-// guarantees the module names should be unique to a program.
-class StandalonePrerequisiteModules : public PrerequisiteModules {
+// are owned by the modules builder.
+class ReusablePrerequisiteModules : public PrerequisiteModules {
 public:
-  StandalonePrerequisiteModules() = default;
+  ReusablePrerequisiteModules() = default;
 
-  StandalonePrerequisiteModules(const StandalonePrerequisiteModules &) = delete;
-  StandalonePrerequisiteModules
-  operator=(const StandalonePrerequisiteModules &) = delete;
-  StandalonePrerequisiteModules(StandalonePrerequisiteModules &&) = delete;
-  StandalonePrerequisiteModules
-  operator=(StandalonePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &) = delete;
+  ReusablePrerequisiteModules
+  operator=(const ReusablePrerequisiteModules &) = delete;
+  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules
+  operator=(ReusablePrerequisiteModules &&) = delete;
 
-  ~StandalonePrerequisiteModules() override = default;
+  ~ReusablePrerequisiteModules() override = default;
 
   void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
     // Appending all built module files.
     for (auto &RequiredModule : RequiredModules)
       Options.PrebuiltModuleFiles.insert_or_assign(
-          RequiredModule.ModuleName, RequiredModule.ModuleFilePath);
+          RequiredModule->ModuleName, RequiredModule->ModuleFilePath);
   }
 
   bool canReuse(const CompilerInvocation &CI,
@@ -228,54 +223,30 @@ class StandalonePrerequisiteModules : public PrerequisiteModules {
     return BuiltModuleNames.contains(ModuleName);
   }
 
-  void addModuleFile(llvm::StringRef ModuleName,
-                     llvm::StringRef ModuleFilePath) {
-    RequiredModules.emplace_back(ModuleName, ModuleFilePath);
-    BuiltModuleNames.insert(ModuleName);
+  void addModuleFile(std::shared_ptr<ModuleFile> BMI) {
+    BuiltModuleNames.insert(BMI->ModuleName);
+    RequiredModules.emplace_back(std::move(BMI));
   }
 
 private:
-  llvm::SmallVector<ModuleFile, 8> RequiredModules;
+  mutable llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules;
   // A helper class to speedup the query if a module is built.
   llvm::StringSet<> BuiltModuleNames;
 };
 
-// Build a module file for module with `ModuleName`. The information of built
-// module file are stored in \param BuiltModuleFiles.
-llvm::Error buildModuleFile(llvm::StringRef ModuleName,
-                            const GlobalCompilationDatabase &CDB,
-                            const ThreadsafeFS &TFS, ProjectModules &MDB,
-                            PathRef ModuleFilesPrefix,
-                            StandalonePrerequisiteModules &BuiltModuleFiles) {
-  if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
-    return llvm::Error::success();
-
-  PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
-  // It is possible that we're meeting third party modules (modules whose
-  // source are not in the project. e.g, the std module may be a third-party
-  // module for most projects) or something wrong with the implementation of
-  // ProjectModules.
-  // FIXME: How should we treat third party modules here? If we want to ignore
-  // third party modules, we should return true instead of false here.
-  // Currently we simply bail out.
-  if (ModuleUnitFileName.empty())
-    return llvm::createStringError("Failed to get the primary source");
-
+/// Build a module file for module with `ModuleName`. The information of built
+/// module file are stored in \param BuiltModuleFiles.
+llvm::Expected<ModuleFile>
+buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
+                const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
+                PathRef ModuleFilesPrefix,
+                const ReusablePrerequisiteModules &BuiltModuleFiles) {
   // Try cheap operation earlier to boil-out cheaply if there are problems.
   auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
   if (!Cmd)
     return llvm::createStringError(
         llvm::formatv("No compile command for {0}", ModuleUnitFileName));
 
-  for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName)) {
-    // Return early if there are errors building the module file.
-    if (llvm::Error Err = buildModuleFile(RequiredModuleName, CDB, TFS, MDB,
-                                          ModuleFilesPrefix, BuiltModuleFiles))
-      return llvm::createStringError(
-          llvm::formatv("Failed to build dependency {0}: {1}",
-                        RequiredModuleName, llvm::toString(std::move(Err))));
-  }
-
   Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
 
   ParseInputs Inputs;
@@ -316,15 +287,282 @@ llvm::Error buildModuleFile(llvm::StringRef ModuleName,
   if (Clang->getDiagnostics().hasErrorOccurred())
     return llvm::createStringError("Compilation failed");
 
-  BuiltModuleFiles.addModuleFile(ModuleName, Inputs.CompileCommand.Output);
-  return llvm::Error::success();
+  return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
+}
+
+bool ReusablePrerequisiteModules::canReuse(
+    const CompilerInvocation &CI,
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
+  if (RequiredModules.empty())
+    return true;
+
+  SmallVector<StringRef> BMIPaths;
+  for (auto &MF : RequiredModules)
+    BMIPaths.push_back(MF->ModuleFilePath);
+  return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
 }
 } // namespace
 
+class ModulesBuilder::ModuleFileCache {
+public:
+  ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
+
+  llvm::Error
+  getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
+                       ProjectModules &MDB,
+                       ReusablePrerequisiteModules &RequiredModules);
+  const GlobalCompilationDatabase &getCDB() const { return CDB; }
+
+private:
+  std::shared_ptr<ModuleFile>
+  getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
+                     const ThreadsafeFS &TFS,
+                     PrerequisiteModules &BuiltModuleFiles);
+
+  /// This should only be called by getValidModuleFile. This is unlocked version
+  /// of getValidModuleFile. The function is extracted to avoid dead locks when
+  /// recursing.
+  std::shared_ptr<ModuleFile>
+  isValidModuleFileUnlocked(StringRef ModuleName, ProjectModules &MDB,
+                            const ThreadsafeFS &TFS,
+                            PrerequisiteModules &BuiltModuleFiles);
+
+  void startBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.insert(ModuleName);
+  }
+  void endBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    BuildingModules.erase(ModuleName);
+  }
+  bool isBuildingModule(StringRef ModuleName) {
+    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+    return BuildingModules.contains(ModuleName);
+  }
+
+  const GlobalCompilationDatabase &CDB;
+
+  llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
+  // Mutex to guard accesses to ModuleFiles.
+  std::mutex ModuleFilesMutex;
+
+  // We should only build a unique module at most at the same time.
+  // When we want to build a module, use the mutex to lock it and use the
+  // condition variable to notify other threads the status of the build results.
+  //
+  // Store the mutex and the condition_variable in shared_ptr since they may be
+  // accessed by many threads.
+  llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes;
+  llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs;
+  // The building modules set. A successed built module or a failed module or
+  // an unbuilt module shouldn't be in this set.
+  // This set is helpful to control the behavior of the condition variables.
+  llvm::StringSet<> BuildingModules;
+  // Lock when we access BuildingModules, BuildingModuleMutexes and
+  // BuildingModuleCVs.
+  std::mutex ModulesBuildingMutex;
+
+  /// An RAII object to guard the process to build a specific module.
+  struct ModuleBuildingSharedOwner {
+  public:
+    ModuleBuildingSharedOwner(StringRef ModuleName,
+                              std::shared_ptr<std::mutex> &Mutex,
+                              std::shared_ptr<std::condition_variable> &CV,
+                              ModuleFileCache &Cache)
+        : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) {
+      IsFirstTask = (Mutex.use_count() == 2);
+    }
+
+    ~ModuleBuildingSharedOwner();
+
+    bool isUniqueBuildingOwner() { return IsFirstTask; }
+
+    std::mutex &getMutex() { return *Mutex; }
+
+    std::condition_variable &getCV() { return *CV; }
+
+  private:
+    StringRef ModuleName;
+    std::shared_ptr<std::mutex> Mutex;
+    std::shared_ptr<std::condition_variable> CV;
+    ModuleFileCache &Cache;
+    bool IsFirstTask;
+  };
+
+  ModuleBuildingSharedOwner
+  getOrCreateModuleBuildingOwner(StringRef ModuleName);
+};
+
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner::
+    ~ModuleBuildingSharedOwner() {
+  std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex);
+
+  Mutex.reset();
+  CV.reset();
+
+  // Try to release the memory in builder if possible.
+  if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName);
+      Iter != Cache.BuildingModuleCVs.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleCVs.erase(Iter);
+
+  if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName);
+      Iter != Cache.BuildingModuleMutexes.end() &&
+      Iter->getValue().use_count() == 1)
+    Cache.BuildingModuleMutexes.erase(Iter);
+}
+
+std::shared_ptr<ModuleFile>
+ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
+    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
+    PrerequisiteModules &BuiltModuleFiles) {
+  auto Iter = ModuleFiles.find(ModuleName);
+  if (Iter != ModuleFiles.end()) {
+    if (!IsModuleFileUpToDate(Iter->second->ModuleFilePath, BuiltModuleFiles,
+                              TFS.view(std::nullopt))) {
+      log("Found not-up-date module file {0} for module {1} in cache",
+          Iter->second->ModuleFilePath, ModuleName);
+      ModuleFiles.erase(Iter);
+      return nullptr;
+    }
+
+    if (llvm::any_of(
+            MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)),
+            [&MDB, &TFS, &BuiltModuleFiles, this](auto &&RequiredModuleName) {
+              return !isValidModuleFileUnlocked(RequiredModuleName, MDB, TFS,
+                                                BuiltModuleFiles);
+            })) {
+      ModuleFiles.erase(Iter);
+      return nullptr;
+    }
+
+    return Iter->second;
+  }
+
+  log("Don't find {0} in cache", ModuleName);
+
+  return nullptr;
+}
+
+std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
+    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
+    PrerequisiteModules &BuiltModuleFiles) {
+  std::lock_guard<std::mutex> _(ModuleFilesMutex);
+
+  return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
+}
+
+ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner
+ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner(
+    StringRef ModuleName) {
+  std::lock_guard<std::mutex> _(ModulesBuildingMutex);
+
+  auto MutexIter = BuildingModuleMutexes.find(ModuleName);
+  if (MutexIter == BuildingModuleMutexes.end())
+    MutexIter = BuildingModuleMutexes
+                    .try_emplace(ModuleName, std::make_shared<std::mutex>())
+                    .first;
+
+  auto CVIter = BuildingModuleCVs.find(ModuleName);
+  if (CVIter == BuildingModuleCVs.end())
+    CVIter = BuildingModuleCVs
+                 .try_emplace(ModuleName,
+                              std::make_shared<std::condition_variable>())
+                 .first;
+
+  return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(),
+                                   CVIter->getValue(), *this);
+}
+
+llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
+    StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
+    ReusablePrerequisiteModules &BuiltModuleFiles) {
+  if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
+    return llvm::Error::success();
+
+  PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
+  /// It is possible that we're meeting third party modules (modules whose
+  /// source are not in the project. e.g, the std module may be a third-party
+  /// module for most project) or something wrong with the implementation of
+  /// ProjectModules.
+  /// FIXME: How should we treat third party modules here? If we want to ignore
+  /// third party modules, we should return true instead of false here.
+  /// Currently we simply bail out.
+  if (ModuleUnitFileName.empty())
+    return llvm::createStringError(
+        llvm::formatv("Don't get the module unit for module {0}", ModuleName));
+
+  for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName))
+    // Return early if there are errors building the module file.
+    if (!getOrBuildModuleFile(RequiredModuleName, TFS, MDB, BuiltModuleFiles))
+      return llvm::createStringError(
+          llvm::formatv("Failed to build module {0}", RequiredModuleName));
+
+  if (std::shared_ptr<ModuleFile> Cached =
+          getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
+    log("Reusing module {0} from {1}", ModuleName, Cached->ModuleFilePath);
+    BuiltModuleFiles.addModuleFile(Cached);
+    return llvm::Error::success();
+  }
+
+  ModuleBuildingSharedOwner ModuleBuildingOwner =
+      getOrCreateModuleBuildingOwner(ModuleName);
+
+  std::condition_variable &CV = ModuleBuildingOwner.getCV();
+  std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex());
+  if (!ModuleBuildingOwner.isUniqueBuildingOwner()) {
+    log("Waiting other task for module {0}", ModuleName);
+    CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); });
+
+    // Try to access the built module files from other threads manually.
+    // We don't call getValidModuleFile here since it may be too heavy.
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+    auto Iter = ModuleFiles.find(ModuleName);
+    if (Iter != ModuleFiles.end()) {
+      log("Got module file from other task building {0}", ModuleName);
+      BuiltModuleFiles.addModuleFile(Iter->second);
+      return llvm::Error::success();
+    }
+
+    // If the module file is not in the cache, it indicates that the building
+    // from other thread failed, so we give up earlier in this case to avoid
+    // wasting time.
+    return llvm::createStringError(llvm::formatv(
+        "The module file {0} may be failed to build in other thread.",
+        ModuleName));
+  }
+
+  log("Building module {0}", ModuleName);
+  startBuildingModule(ModuleName);
+
+  auto _ = llvm::make_scope_exit([&]() {
+    endBuildingModule(ModuleName);
+    CV.notify_all();
+  });
+
+  llvm::SmallString<256> ModuleFilesPrefix =
+      getUniqueModuleFilesPath(ModuleUnitFileName);
+
+  llvm::Expected<ModuleFile> MF =
+      buildModuleFile(ModuleName, ModuleUnitFileName, CDB, TFS,
+                      ModuleFilesPrefix, BuiltModuleFiles);
+  if (llvm::Error Err = MF.takeError())
+    return Err;
+
+  log("Built module {0} to {1}", ModuleName, MF->ModuleFilePath);
+
+  std::lock_guard<std::mutex> __(ModuleFilesMutex);
+  auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF));
+  ModuleFiles.insert_or_assign(ModuleName, BuiltModuleFile);
+  BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
+
+  return llvm::Error::success();
+}
 std::unique_ptr<PrerequisiteModules>
 ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
-                                            const ThreadsafeFS &TFS) const {
-  std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File);
+                                            const ThreadsafeFS &TFS) {
+  std::unique_ptr<ProjectModules> MDB =
+      MFCache->getCDB().getProjectModules(File);
   if (!MDB) {
     elog("Failed to get Project Modules information for {0}", File);
     return std::make_unique<FailedPrerequisiteModules>();
@@ -332,20 +570,19 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
 
   std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
   if (RequiredModuleNames.empty())
-    return std::make_unique<StandalonePrerequisiteModules>();
+    return std::make_unique<ReusablePrerequisiteModules>();
 
   llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(File);
 
   log("Trying to build required modules for {0} in {1}", File,
       ModuleFilesPrefix);
 
-  auto RequiredModules = std::make_unique<StandalonePrerequisiteModules>();
+  auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
 
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
-    if (llvm::Error Err =
-            buildModuleFile(RequiredModuleName, CDB, TFS, *MDB.get(),
-                            ModuleFilesPrefix, *RequiredModules.get())) {
+    if (llvm::Error Err = MFCache->getOrBuildModuleFile(
+            RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
       elog("Failed to build module {0}; due to {1}", RequiredModuleName,
            toString(std::move(Err)));
       return std::make_unique<FailedPrerequisiteModules>();
@@ -357,17 +594,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
   return std::move(RequiredModules);
 }
 
-bool StandalonePrerequisiteModules::canReuse(
-    const CompilerInvocation &CI,
-    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
-  if (RequiredModules.empty())
-    return true;
-
-  SmallVector<StringRef> BMIPaths;
-  for (auto &MF : RequiredModules)
-    BMIPaths.push_back(MF.ModuleFilePath);
-  return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
+ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
+  MFCache = std::make_unique<ModuleFileCache>(CDB);
 }
 
+ModulesBuilder::~ModulesBuilder() {}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/ModulesBuilder.h b/clang-tools-extra/clangd/ModulesBuilder.h
index 0514e7486475d0..2aae441747a898 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.h
+++ b/clang-tools-extra/clangd/ModulesBuilder.h
@@ -85,7 +85,8 @@ class PrerequisiteModules {
 /// different versions and different source files.
 class ModulesBuilder {
 public:
-  ModulesBuilder(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
+  ModulesBuilder(const GlobalCompilationDatabase &CDB);
+  ~ModulesBuilder();
 
   ModulesBuilder(const ModulesBuilder &) = delete;
   ModulesBuilder(ModulesBuilder &&) = delete;
@@ -94,10 +95,11 @@ class ModulesBuilder {
   ModulesBuilder &operator=(ModulesBuilder &&) = delete;
 
   std::unique_ptr<PrerequisiteModules>
-  buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) const;
+  buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS);
 
 private:
-  const GlobalCompilationDatabase &CDB;
+  class ModuleFileCache;
+  std::unique_ptr<ModuleFileCache> MFCache;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 691a93e7acd0af..9577935c48b5ec 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -482,6 +482,42 @@ void func() {
   EXPECT_EQ(Result.signatures[0].parameters[0].labelString, "int a");
 }
 
+TEST_F(PrerequisiteModulesTests, ReusablePrerequisiteModulesTest) {
+  MockDirectoryCompilationDatabase CDB(TestDir, FS);
+
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+export int M = 43;
+  )cpp");
+  CDB.addFile("A.cppm", R"cpp(
+export module A;
+import M;
+export int A = 43 + M;
+  )cpp");
+  CDB.addFile("B.cppm", R"cpp(
+export module B;
+import M;
+export int B = 44 + M;
+  )cpp");
+
+  ModulesBuilder Builder(CDB);
+
+  auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+  EXPECT_TRUE(AInfo);
+  auto BInfo = Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+  EXPECT_TRUE(BInfo);
+  HeaderSearchOptions HSOptsA(TestDir);
+  HeaderSearchOptions HSOptsB(TestDir);
+  AInfo->adjustHeaderSearchOptions(HSOptsA);
+  BInfo->adjustHeaderSearchOptions(HSOptsB);
+
+  EXPECT_FALSE(HSOptsA.PrebuiltModuleFiles.empty());
+  EXPECT_FALSE(HSOptsB.PrebuiltModuleFiles.empty());
+
+  // Check that we're reusing the module files.
+  EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles);
+}
+
 } // namespace
 } // namespace clang::clangd
 

>From 4ed7bf5fadb8a3879fc9862537a24a4d9990a055 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Thu, 31 Oct 2024 13:35:59 +0800
Subject: [PATCH 2/7] Remove unique builder

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 133 --------------------
 1 file changed, 133 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index c4fb3d4010d370..e1c0cc743097f5 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -327,91 +327,13 @@ class ModulesBuilder::ModuleFileCache {
                             const ThreadsafeFS &TFS,
                             PrerequisiteModules &BuiltModuleFiles);
 
-  void startBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    BuildingModules.insert(ModuleName);
-  }
-  void endBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    BuildingModules.erase(ModuleName);
-  }
-  bool isBuildingModule(StringRef ModuleName) {
-    std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-    return BuildingModules.contains(ModuleName);
-  }
-
   const GlobalCompilationDatabase &CDB;
 
   llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
   // Mutex to guard accesses to ModuleFiles.
   std::mutex ModuleFilesMutex;
-
-  // We should only build a unique module at most at the same time.
-  // When we want to build a module, use the mutex to lock it and use the
-  // condition variable to notify other threads the status of the build results.
-  //
-  // Store the mutex and the condition_variable in shared_ptr since they may be
-  // accessed by many threads.
-  llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes;
-  llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs;
-  // The building modules set. A successed built module or a failed module or
-  // an unbuilt module shouldn't be in this set.
-  // This set is helpful to control the behavior of the condition variables.
-  llvm::StringSet<> BuildingModules;
-  // Lock when we access BuildingModules, BuildingModuleMutexes and
-  // BuildingModuleCVs.
-  std::mutex ModulesBuildingMutex;
-
-  /// An RAII object to guard the process to build a specific module.
-  struct ModuleBuildingSharedOwner {
-  public:
-    ModuleBuildingSharedOwner(StringRef ModuleName,
-                              std::shared_ptr<std::mutex> &Mutex,
-                              std::shared_ptr<std::condition_variable> &CV,
-                              ModuleFileCache &Cache)
-        : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) {
-      IsFirstTask = (Mutex.use_count() == 2);
-    }
-
-    ~ModuleBuildingSharedOwner();
-
-    bool isUniqueBuildingOwner() { return IsFirstTask; }
-
-    std::mutex &getMutex() { return *Mutex; }
-
-    std::condition_variable &getCV() { return *CV; }
-
-  private:
-    StringRef ModuleName;
-    std::shared_ptr<std::mutex> Mutex;
-    std::shared_ptr<std::condition_variable> CV;
-    ModuleFileCache &Cache;
-    bool IsFirstTask;
-  };
-
-  ModuleBuildingSharedOwner
-  getOrCreateModuleBuildingOwner(StringRef ModuleName);
 };
 
-ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner::
-    ~ModuleBuildingSharedOwner() {
-  std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex);
-
-  Mutex.reset();
-  CV.reset();
-
-  // Try to release the memory in builder if possible.
-  if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName);
-      Iter != Cache.BuildingModuleCVs.end() &&
-      Iter->getValue().use_count() == 1)
-    Cache.BuildingModuleCVs.erase(Iter);
-
-  if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName);
-      Iter != Cache.BuildingModuleMutexes.end() &&
-      Iter->getValue().use_count() == 1)
-    Cache.BuildingModuleMutexes.erase(Iter);
-}
-
 std::shared_ptr<ModuleFile>
 ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
     StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
@@ -452,28 +374,6 @@ std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
   return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
 }
 
-ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner
-ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner(
-    StringRef ModuleName) {
-  std::lock_guard<std::mutex> _(ModulesBuildingMutex);
-
-  auto MutexIter = BuildingModuleMutexes.find(ModuleName);
-  if (MutexIter == BuildingModuleMutexes.end())
-    MutexIter = BuildingModuleMutexes
-                    .try_emplace(ModuleName, std::make_shared<std::mutex>())
-                    .first;
-
-  auto CVIter = BuildingModuleCVs.find(ModuleName);
-  if (CVIter == BuildingModuleCVs.end())
-    CVIter = BuildingModuleCVs
-                 .try_emplace(ModuleName,
-                              std::make_shared<std::condition_variable>())
-                 .first;
-
-  return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(),
-                                   CVIter->getValue(), *this);
-}
-
 llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
     ReusablePrerequisiteModules &BuiltModuleFiles) {
@@ -505,40 +405,7 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
     return llvm::Error::success();
   }
 
-  ModuleBuildingSharedOwner ModuleBuildingOwner =
-      getOrCreateModuleBuildingOwner(ModuleName);
-
-  std::condition_variable &CV = ModuleBuildingOwner.getCV();
-  std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex());
-  if (!ModuleBuildingOwner.isUniqueBuildingOwner()) {
-    log("Waiting other task for module {0}", ModuleName);
-    CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); });
-
-    // Try to access the built module files from other threads manually.
-    // We don't call getValidModuleFile here since it may be too heavy.
-    std::lock_guard<std::mutex> _(ModuleFilesMutex);
-    auto Iter = ModuleFiles.find(ModuleName);
-    if (Iter != ModuleFiles.end()) {
-      log("Got module file from other task building {0}", ModuleName);
-      BuiltModuleFiles.addModuleFile(Iter->second);
-      return llvm::Error::success();
-    }
-
-    // If the module file is not in the cache, it indicates that the building
-    // from other thread failed, so we give up earlier in this case to avoid
-    // wasting time.
-    return llvm::createStringError(llvm::formatv(
-        "The module file {0} may be failed to build in other thread.",
-        ModuleName));
-  }
-
   log("Building module {0}", ModuleName);
-  startBuildingModule(ModuleName);
-
-  auto _ = llvm::make_scope_exit([&]() {
-    endBuildingModule(ModuleName);
-    CV.notify_all();
-  });
 
   llvm::SmallString<256> ModuleFilesPrefix =
       getUniqueModuleFilesPath(ModuleUnitFileName);

>From dcdd18acd9b5d30e5607f0dffc18843e06adbfd6 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Mon, 4 Nov 2024 14:26:12 +0800
Subject: [PATCH 3/7] Update

---
 clang-tools-extra/clangd/ModulesBuilder.cpp   | 151 +++++++++++-------
 clang-tools-extra/clangd/ModulesBuilder.h     |   4 +-
 .../unittests/PrerequisiteModulesTest.cpp     |  36 +++++
 3 files changed, 135 insertions(+), 56 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index e1c0cc743097f5..73186f3b5ce7fc 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -14,6 +14,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "llvm/ADT/ScopeExit.h"
+#include <queue>
 
 namespace clang {
 namespace clangd {
@@ -125,6 +126,11 @@ struct ModuleFile {
       llvm::sys::fs::remove(ModuleFilePath);
   }
 
+  StringRef getModuleName() const { return ModuleName; }
+
+  StringRef getModuleFilePath() const { return ModuleFilePath; }
+
+private:
   std::string ModuleName;
   std::string ModuleFilePath;
 };
@@ -213,7 +219,8 @@ class ReusablePrerequisiteModules : public PrerequisiteModules {
     // Appending all built module files.
     for (auto &RequiredModule : RequiredModules)
       Options.PrebuiltModuleFiles.insert_or_assign(
-          RequiredModule->ModuleName, RequiredModule->ModuleFilePath);
+          RequiredModule->getModuleName().str(),
+          RequiredModule->getModuleFilePath().str());
   }
 
   bool canReuse(const CompilerInvocation &CI,
@@ -224,12 +231,12 @@ class ReusablePrerequisiteModules : public PrerequisiteModules {
   }
 
   void addModuleFile(std::shared_ptr<ModuleFile> BMI) {
-    BuiltModuleNames.insert(BMI->ModuleName);
+    BuiltModuleNames.insert(BMI->getModuleName());
     RequiredModules.emplace_back(std::move(BMI));
   }
 
 private:
-  mutable llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules;
+  llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules;
   // A helper class to speedup the query if a module is built.
   llvm::StringSet<> BuiltModuleNames;
 };
@@ -298,12 +305,11 @@ bool ReusablePrerequisiteModules::canReuse(
 
   SmallVector<StringRef> BMIPaths;
   for (auto &MF : RequiredModules)
-    BMIPaths.push_back(MF->ModuleFilePath);
+    BMIPaths.push_back(MF->getModuleFilePath());
   return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
 }
-} // namespace
 
-class ModulesBuilder::ModuleFileCache {
+class ModuleFileCache {
 public:
   ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
 
@@ -313,20 +319,18 @@ class ModulesBuilder::ModuleFileCache {
                        ReusablePrerequisiteModules &RequiredModules);
   const GlobalCompilationDatabase &getCDB() const { return CDB; }
 
-private:
   std::shared_ptr<ModuleFile>
   getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
                      const ThreadsafeFS &TFS,
                      PrerequisiteModules &BuiltModuleFiles);
 
-  /// This should only be called by getValidModuleFile. This is unlocked version
-  /// of getValidModuleFile. The function is extracted to avoid dead locks when
-  /// recursing.
-  std::shared_ptr<ModuleFile>
-  isValidModuleFileUnlocked(StringRef ModuleName, ProjectModules &MDB,
-                            const ThreadsafeFS &TFS,
-                            PrerequisiteModules &BuiltModuleFiles);
+  void add(StringRef ModuleName, std::shared_ptr<ModuleFile> ModuleFile) {
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+
+    ModuleFiles.insert_or_assign(ModuleName, ModuleFile);
+  }
 
+private:
   const GlobalCompilationDatabase &CDB;
 
   llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
@@ -334,47 +338,88 @@ class ModulesBuilder::ModuleFileCache {
   std::mutex ModuleFilesMutex;
 };
 
+/// Collect the directly and indirectly required module names for \param
+/// ModuleName. The \param ModuleName is guaranteed to be the first element in
+/// \param ModuleNames.
+void getAllRequiredModules(ProjectModules &MDB, StringRef ModuleName,
+                           llvm::SmallVector<StringRef> &ModuleNames) {
+  std::queue<StringRef> Worklist;
+  llvm::StringSet<> ModuleNamesSet;
+  Worklist.push(ModuleName);
+
+  while (!Worklist.empty()) {
+    StringRef CurrentModule = Worklist.front();
+    Worklist.pop();
+
+    if (!ModuleNamesSet.insert(CurrentModule).second)
+      continue;
+
+    ModuleNames.push_back(CurrentModule);
+
+    for (StringRef RequiredModuleName :
+         MDB.getRequiredModules(MDB.getSourceForModuleName(CurrentModule)))
+      if (!ModuleNamesSet.contains(RequiredModuleName))
+        Worklist.push(RequiredModuleName);
+  }
+}
+
 std::shared_ptr<ModuleFile>
-ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked(
-    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
-    PrerequisiteModules &BuiltModuleFiles) {
-  auto Iter = ModuleFiles.find(ModuleName);
-  if (Iter != ModuleFiles.end()) {
-    if (!IsModuleFileUpToDate(Iter->second->ModuleFilePath, BuiltModuleFiles,
-                              TFS.view(std::nullopt))) {
-      log("Found not-up-date module file {0} for module {1} in cache",
-          Iter->second->ModuleFilePath, ModuleName);
-      ModuleFiles.erase(Iter);
-      return nullptr;
-    }
+ModuleFileCache::getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
+                                    const ThreadsafeFS &TFS,
+                                    PrerequisiteModules &BuiltModuleFiles) {
+  {
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
 
-    if (llvm::any_of(
-            MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)),
-            [&MDB, &TFS, &BuiltModuleFiles, this](auto &&RequiredModuleName) {
-              return !isValidModuleFileUnlocked(RequiredModuleName, MDB, TFS,
-                                                BuiltModuleFiles);
-            })) {
-      ModuleFiles.erase(Iter);
+    if (ModuleFiles.find(ModuleName) == ModuleFiles.end())
       return nullptr;
-    }
+  }
+
+  llvm::SmallVector<StringRef> ModuleNames;
+  getAllRequiredModules(MDB, ModuleName, ModuleNames);
+
+  llvm::SmallVector<std::shared_ptr<ModuleFile>> RequiredModuleFiles;
+
+  {
+    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+
+    for (StringRef ModuleName : ModuleNames) {
+      auto Iter = ModuleFiles.find(ModuleName);
+      if (Iter == ModuleFiles.end())
+        return nullptr;
 
-    return Iter->second;
+      RequiredModuleFiles.push_back(Iter->second);
+    }
   }
 
-  log("Don't find {0} in cache", ModuleName);
+  if (RequiredModuleFiles.empty())
+    return nullptr;
+
+  if (llvm::all_of(RequiredModuleFiles, [&](auto MF) {
+        return IsModuleFileUpToDate(MF->getModuleFilePath(), BuiltModuleFiles,
+                                    TFS.view(std::nullopt));
+      }))
+    return RequiredModuleFiles[0];
 
   return nullptr;
 }
+} // namespace
 
-std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile(
-    StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS,
-    PrerequisiteModules &BuiltModuleFiles) {
-  std::lock_guard<std::mutex> _(ModuleFilesMutex);
+class ModulesBuilder::ModulesBuilderImpl {
+public:
+  ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
 
-  return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles);
-}
+  const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
 
-llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
+  llvm::Error
+  getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
+                       ProjectModules &MDB,
+                       ReusablePrerequisiteModules &BuiltModuleFiles);
+
+private:
+  ModuleFileCache Cache;
+};
+
+llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
     StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
     ReusablePrerequisiteModules &BuiltModuleFiles) {
   if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
@@ -399,8 +444,8 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
           llvm::formatv("Failed to build module {0}", RequiredModuleName));
 
   if (std::shared_ptr<ModuleFile> Cached =
-          getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
-    log("Reusing module {0} from {1}", ModuleName, Cached->ModuleFilePath);
+          Cache.getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
+    log("Reusing module {0} from {1}", ModuleName, Cached->getModuleFilePath());
     BuiltModuleFiles.addModuleFile(Cached);
     return llvm::Error::success();
   }
@@ -411,25 +456,23 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile(
       getUniqueModuleFilesPath(ModuleUnitFileName);
 
   llvm::Expected<ModuleFile> MF =
-      buildModuleFile(ModuleName, ModuleUnitFileName, CDB, TFS,
+      buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS,
                       ModuleFilesPrefix, BuiltModuleFiles);
   if (llvm::Error Err = MF.takeError())
     return Err;
 
-  log("Built module {0} to {1}", ModuleName, MF->ModuleFilePath);
-
-  std::lock_guard<std::mutex> __(ModuleFilesMutex);
+  log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
   auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF));
-  ModuleFiles.insert_or_assign(ModuleName, BuiltModuleFile);
+  Cache.add(ModuleName, BuiltModuleFile);
   BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
 
   return llvm::Error::success();
 }
+
 std::unique_ptr<PrerequisiteModules>
 ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
                                             const ThreadsafeFS &TFS) {
-  std::unique_ptr<ProjectModules> MDB =
-      MFCache->getCDB().getProjectModules(File);
+  std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
   if (!MDB) {
     elog("Failed to get Project Modules information for {0}", File);
     return std::make_unique<FailedPrerequisiteModules>();
@@ -448,7 +491,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
 
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
-    if (llvm::Error Err = MFCache->getOrBuildModuleFile(
+    if (llvm::Error Err = Impl->getOrBuildModuleFile(
             RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
       elog("Failed to build module {0}; due to {1}", RequiredModuleName,
            toString(std::move(Err)));
@@ -462,7 +505,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
 }
 
 ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
-  MFCache = std::make_unique<ModuleFileCache>(CDB);
+  Impl = std::make_unique<ModulesBuilderImpl>(CDB);
 }
 
 ModulesBuilder::~ModulesBuilder() {}
diff --git a/clang-tools-extra/clangd/ModulesBuilder.h b/clang-tools-extra/clangd/ModulesBuilder.h
index 2aae441747a898..f40a9006e91690 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.h
+++ b/clang-tools-extra/clangd/ModulesBuilder.h
@@ -98,8 +98,8 @@ class ModulesBuilder {
   buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS);
 
 private:
-  class ModuleFileCache;
-  std::unique_ptr<ModuleFileCache> MFCache;
+  class ModulesBuilderImpl;
+  std::unique_ptr<ModulesBuilderImpl> Impl;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
index 9577935c48b5ec..1bb8e19cce23e0 100644
--- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
+++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp
@@ -516,6 +516,42 @@ export int B = 44 + M;
 
   // Check that we're reusing the module files.
   EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles);
+
+  // Update M.cppm to check if the modules builder can update correctly.
+  CDB.addFile("M.cppm", R"cpp(
+export module M;
+export constexpr int M = 43;
+  )cpp");
+
+  ParseInputs AUse = getInputs("A.cppm", CDB);
+  AUse.ModulesManager = &Builder;
+  std::unique_ptr<CompilerInvocation> AInvocation =
+      buildCompilerInvocation(AUse, DiagConsumer);
+  EXPECT_FALSE(AInfo->canReuse(*AInvocation, FS.view(TestDir)));
+
+  ParseInputs BUse = getInputs("B.cppm", CDB);
+  AUse.ModulesManager = &Builder;
+  std::unique_ptr<CompilerInvocation> BInvocation =
+      buildCompilerInvocation(BUse, DiagConsumer);
+  EXPECT_FALSE(BInfo->canReuse(*BInvocation, FS.view(TestDir)));
+
+  auto NewAInfo =
+      Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
+  auto NewBInfo =
+      Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS);
+  EXPECT_TRUE(NewAInfo);
+  EXPECT_TRUE(NewBInfo);
+  HeaderSearchOptions NewHSOptsA(TestDir);
+  HeaderSearchOptions NewHSOptsB(TestDir);
+  NewAInfo->adjustHeaderSearchOptions(NewHSOptsA);
+  NewBInfo->adjustHeaderSearchOptions(NewHSOptsB);
+
+  EXPECT_FALSE(NewHSOptsA.PrebuiltModuleFiles.empty());
+  EXPECT_FALSE(NewHSOptsB.PrebuiltModuleFiles.empty());
+
+  EXPECT_EQ(NewHSOptsA.PrebuiltModuleFiles, NewHSOptsB.PrebuiltModuleFiles);
+  // Check that we didn't reuse the old and stale module files.
+  EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles);
 }
 
 } // namespace

>From 370b01fa407c7bf768c490982610529cccedea8f Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Wed, 6 Nov 2024 13:24:57 +0800
Subject: [PATCH 4/7] Address comments

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 169 ++++++++++++--------
 1 file changed, 100 insertions(+), 69 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 73186f3b5ce7fc..24a6c35e97b2d0 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -95,9 +95,47 @@ class FailedPrerequisiteModules : public PrerequisiteModules {
   }
 };
 
+struct ModuleFile;
+
+// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
+// the required modules are built successfully. All the module files
+// are owned by the modules builder.
+class ReusablePrerequisiteModules : public PrerequisiteModules {
+public:
+  ReusablePrerequisiteModules() = default;
+
+  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
+      default;
+  ReusablePrerequisiteModules &
+  operator=(const ReusablePrerequisiteModules &) = default;
+  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules
+  operator=(ReusablePrerequisiteModules &&) = delete;
+
+  ~ReusablePrerequisiteModules() override = default;
+
+  void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override;
+
+  bool canReuse(const CompilerInvocation &CI,
+                llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
+
+  bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
+    return BuiltModuleNames.contains(ModuleName);
+  }
+
+  void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile);
+
+private:
+  llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
+  // A helper class to speedup the query if a module is built.
+  llvm::StringSet<> BuiltModuleNames;
+};
+
 struct ModuleFile {
-  ModuleFile(StringRef ModuleName, PathRef ModuleFilePath)
-      : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {}
+  ModuleFile(StringRef ModuleName, PathRef ModuleFilePath,
+             const ReusablePrerequisiteModules &RequiredModuleFiles)
+      : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()),
+        RequiredModuleFiles(RequiredModuleFiles) {}
 
   ModuleFile() = delete;
 
@@ -133,8 +171,27 @@ struct ModuleFile {
 private:
   std::string ModuleName;
   std::string ModuleFilePath;
+
+  // The required module files. We need to share the ownership for required
+  // module files.
+  ReusablePrerequisiteModules RequiredModuleFiles;
 };
 
+void ReusablePrerequisiteModules::adjustHeaderSearchOptions(
+    HeaderSearchOptions &Options) const {
+  // Appending all built module files.
+  for (const auto &RequiredModule : RequiredModules)
+    Options.PrebuiltModuleFiles.insert_or_assign(
+        RequiredModule->getModuleName().str(),
+        RequiredModule->getModuleFilePath().str());
+}
+
+void ReusablePrerequisiteModules::addModuleFile(
+    std::shared_ptr<const ModuleFile> ModuleFile) {
+  BuiltModuleNames.insert(ModuleFile->getModuleName());
+  RequiredModules.emplace_back(std::move(ModuleFile));
+}
+
 bool IsModuleFileUpToDate(PathRef ModuleFilePath,
                           const PrerequisiteModules &RequisiteModules,
                           llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
@@ -199,48 +256,6 @@ bool IsModuleFilesUpToDate(
       });
 }
 
-// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
-// the required modules are built successfully. All the module files
-// are owned by the modules builder.
-class ReusablePrerequisiteModules : public PrerequisiteModules {
-public:
-  ReusablePrerequisiteModules() = default;
-
-  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &) = delete;
-  ReusablePrerequisiteModules
-  operator=(const ReusablePrerequisiteModules &) = delete;
-  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
-  ReusablePrerequisiteModules
-  operator=(ReusablePrerequisiteModules &&) = delete;
-
-  ~ReusablePrerequisiteModules() override = default;
-
-  void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
-    // Appending all built module files.
-    for (auto &RequiredModule : RequiredModules)
-      Options.PrebuiltModuleFiles.insert_or_assign(
-          RequiredModule->getModuleName().str(),
-          RequiredModule->getModuleFilePath().str());
-  }
-
-  bool canReuse(const CompilerInvocation &CI,
-                llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
-
-  bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
-    return BuiltModuleNames.contains(ModuleName);
-  }
-
-  void addModuleFile(std::shared_ptr<ModuleFile> BMI) {
-    BuiltModuleNames.insert(BMI->getModuleName());
-    RequiredModules.emplace_back(std::move(BMI));
-  }
-
-private:
-  llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules;
-  // A helper class to speedup the query if a module is built.
-  llvm::StringSet<> BuiltModuleNames;
-};
-
 /// Build a module file for module with `ModuleName`. The information of built
 /// module file are stored in \param BuiltModuleFiles.
 llvm::Expected<ModuleFile>
@@ -294,7 +309,7 @@ buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
   if (Clang->getDiagnostics().hasErrorOccurred())
     return llvm::createStringError("Compilation failed");
 
-  return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
+  return ModuleFile{ModuleName, Inputs.CompileCommand.Output, BuiltModuleFiles};
 }
 
 bool ReusablePrerequisiteModules::canReuse(
@@ -319,13 +334,13 @@ class ModuleFileCache {
                        ReusablePrerequisiteModules &RequiredModules);
   const GlobalCompilationDatabase &getCDB() const { return CDB; }
 
-  std::shared_ptr<ModuleFile>
+  std::shared_ptr<const ModuleFile>
   getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
                      const ThreadsafeFS &TFS,
                      PrerequisiteModules &BuiltModuleFiles);
 
-  void add(StringRef ModuleName, std::shared_ptr<ModuleFile> ModuleFile) {
-    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+  void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
+    std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
     ModuleFiles.insert_or_assign(ModuleName, ModuleFile);
   }
@@ -333,7 +348,7 @@ class ModuleFileCache {
 private:
   const GlobalCompilationDatabase &CDB;
 
-  llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles;
+  llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
   // Mutex to guard accesses to ModuleFiles.
   std::mutex ModuleFilesMutex;
 };
@@ -341,8 +356,10 @@ class ModuleFileCache {
 /// Collect the directly and indirectly required module names for \param
 /// ModuleName. The \param ModuleName is guaranteed to be the first element in
 /// \param ModuleNames.
-void getAllRequiredModules(ProjectModules &MDB, StringRef ModuleName,
-                           llvm::SmallVector<StringRef> &ModuleNames) {
+llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+                                                   StringRef ModuleName) {
+  llvm::SmallVector<StringRef> ModuleNames;
+
   std::queue<StringRef> Worklist;
   llvm::StringSet<> ModuleNamesSet;
   Worklist.push(ModuleName);
@@ -361,46 +378,60 @@ void getAllRequiredModules(ProjectModules &MDB, StringRef ModuleName,
       if (!ModuleNamesSet.contains(RequiredModuleName))
         Worklist.push(RequiredModuleName);
   }
+
+  return ModuleNames;
 }
 
-std::shared_ptr<ModuleFile>
+std::shared_ptr<const ModuleFile>
 ModuleFileCache::getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
                                     const ThreadsafeFS &TFS,
                                     PrerequisiteModules &BuiltModuleFiles) {
   {
-    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+    std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+
+    auto Iter = ModuleFiles.find(ModuleName);
+    if (Iter == ModuleFiles.end())
+      return nullptr;
 
-    if (ModuleFiles.find(ModuleName) == ModuleFiles.end())
+    if (Iter->second.expired()) {
+      ModuleFiles.erase(Iter);
       return nullptr;
+    }
   }
 
-  llvm::SmallVector<StringRef> ModuleNames;
-  getAllRequiredModules(MDB, ModuleName, ModuleNames);
+  llvm::SmallVector<StringRef> ModuleNames =
+      getAllRequiredModules(MDB, ModuleName);
 
-  llvm::SmallVector<std::shared_ptr<ModuleFile>> RequiredModuleFiles;
+  llvm::SmallVector<std::shared_ptr<const ModuleFile>> RequiredModuleFiles;
 
   {
-    std::lock_guard<std::mutex> _(ModuleFilesMutex);
+    std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
     for (StringRef ModuleName : ModuleNames) {
       auto Iter = ModuleFiles.find(ModuleName);
       if (Iter == ModuleFiles.end())
         return nullptr;
 
-      RequiredModuleFiles.push_back(Iter->second);
+      if (Iter->second.expired()) {
+        ModuleFiles.erase(Iter);
+        return nullptr;
+      }
+
+      RequiredModuleFiles.push_back(Iter->second.lock());
     }
   }
 
-  if (RequiredModuleFiles.empty())
-    return nullptr;
+  assert(!RequiredModuleFiles.empty());
 
-  if (llvm::all_of(RequiredModuleFiles, [&](auto MF) {
-        return IsModuleFileUpToDate(MF->getModuleFilePath(), BuiltModuleFiles,
-                                    TFS.view(std::nullopt));
-      }))
-    return RequiredModuleFiles[0];
+  if (llvm::any_of(RequiredModuleFiles,
+                   [&](std::shared_ptr<const ModuleFile> MF) {
+                     return !IsModuleFileUpToDate(MF->getModuleFilePath(),
+                                                  BuiltModuleFiles,
+                                                  TFS.view(std::nullopt));
+                   }))
+    return nullptr;
 
-  return nullptr;
+  return RequiredModuleFiles[0];
 }
 } // namespace
 
@@ -443,7 +474,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
       return llvm::createStringError(
           llvm::formatv("Failed to build module {0}", RequiredModuleName));
 
-  if (std::shared_ptr<ModuleFile> Cached =
+  if (std::shared_ptr<const ModuleFile> Cached =
           Cache.getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
     log("Reusing module {0} from {1}", ModuleName, Cached->getModuleFilePath());
     BuiltModuleFiles.addModuleFile(Cached);
@@ -462,7 +493,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
     return Err;
 
   log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
-  auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF));
+  auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
   Cache.add(ModuleName, BuiltModuleFile);
   BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
 

>From 4e0f1e36360ee01a4deccf89b6e8ea53517220ea Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 12 Nov 2024 13:49:45 +0800
Subject: [PATCH 5/7] Address comments

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 266 ++++++++------------
 1 file changed, 109 insertions(+), 157 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 24a6c35e97b2d0..2b68335b493c87 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -95,47 +95,9 @@ class FailedPrerequisiteModules : public PrerequisiteModules {
   }
 };
 
-struct ModuleFile;
-
-// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
-// the required modules are built successfully. All the module files
-// are owned by the modules builder.
-class ReusablePrerequisiteModules : public PrerequisiteModules {
-public:
-  ReusablePrerequisiteModules() = default;
-
-  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
-      default;
-  ReusablePrerequisiteModules &
-  operator=(const ReusablePrerequisiteModules &) = default;
-  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
-  ReusablePrerequisiteModules
-  operator=(ReusablePrerequisiteModules &&) = delete;
-
-  ~ReusablePrerequisiteModules() override = default;
-
-  void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override;
-
-  bool canReuse(const CompilerInvocation &CI,
-                llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
-
-  bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
-    return BuiltModuleNames.contains(ModuleName);
-  }
-
-  void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile);
-
-private:
-  llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
-  // A helper class to speedup the query if a module is built.
-  llvm::StringSet<> BuiltModuleNames;
-};
-
 struct ModuleFile {
-  ModuleFile(StringRef ModuleName, PathRef ModuleFilePath,
-             const ReusablePrerequisiteModules &RequiredModuleFiles)
-      : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()),
-        RequiredModuleFiles(RequiredModuleFiles) {}
+  ModuleFile(StringRef ModuleName, PathRef ModuleFilePath)
+      : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {}
 
   ModuleFile() = delete;
 
@@ -171,26 +133,50 @@ struct ModuleFile {
 private:
   std::string ModuleName;
   std::string ModuleFilePath;
-
-  // The required module files. We need to share the ownership for required
-  // module files.
-  ReusablePrerequisiteModules RequiredModuleFiles;
 };
 
-void ReusablePrerequisiteModules::adjustHeaderSearchOptions(
-    HeaderSearchOptions &Options) const {
-  // Appending all built module files.
-  for (const auto &RequiredModule : RequiredModules)
-    Options.PrebuiltModuleFiles.insert_or_assign(
-        RequiredModule->getModuleName().str(),
-        RequiredModule->getModuleFilePath().str());
-}
+// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
+// the required modules are built successfully. All the module files
+// are owned by the modules builder.
+class ReusablePrerequisiteModules : public PrerequisiteModules {
+public:
+  ReusablePrerequisiteModules() = default;
 
-void ReusablePrerequisiteModules::addModuleFile(
-    std::shared_ptr<const ModuleFile> ModuleFile) {
-  BuiltModuleNames.insert(ModuleFile->getModuleName());
-  RequiredModules.emplace_back(std::move(ModuleFile));
-}
+  ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
+      default;
+  ReusablePrerequisiteModules &
+  operator=(const ReusablePrerequisiteModules &) = default;
+  ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
+  ReusablePrerequisiteModules
+  operator=(ReusablePrerequisiteModules &&) = delete;
+
+  ~ReusablePrerequisiteModules() override = default;
+
+  void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
+    // Appending all built module files.
+    for (const auto &RequiredModule : RequiredModules)
+      Options.PrebuiltModuleFiles.insert_or_assign(
+          RequiredModule->getModuleName().str(),
+          RequiredModule->getModuleFilePath().str());
+  }
+
+  bool canReuse(const CompilerInvocation &CI,
+                llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
+
+  bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
+    return BuiltModuleNames.contains(ModuleName);
+  }
+
+  void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) {
+    BuiltModuleNames.insert(ModuleFile->getModuleName());
+    RequiredModules.emplace_back(std::move(ModuleFile));
+  }
+
+private:
+  llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
+  // A helper class to speedup the query if a module is built.
+  llvm::StringSet<> BuiltModuleNames;
+};
 
 bool IsModuleFileUpToDate(PathRef ModuleFilePath,
                           const PrerequisiteModules &RequisiteModules,
@@ -309,7 +295,7 @@ buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
   if (Clang->getDiagnostics().hasErrorOccurred())
     return llvm::createStringError("Compilation failed");
 
-  return ModuleFile{ModuleName, Inputs.CompileCommand.Output, BuiltModuleFiles};
+  return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
 }
 
 bool ReusablePrerequisiteModules::canReuse(
@@ -327,17 +313,9 @@ bool ReusablePrerequisiteModules::canReuse(
 class ModuleFileCache {
 public:
   ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
-
-  llvm::Error
-  getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
-                       ProjectModules &MDB,
-                       ReusablePrerequisiteModules &RequiredModules);
   const GlobalCompilationDatabase &getCDB() const { return CDB; }
 
-  std::shared_ptr<const ModuleFile>
-  getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
-                     const ThreadsafeFS &TFS,
-                     PrerequisiteModules &BuiltModuleFiles);
+  std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName);
 
   void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
     std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
@@ -345,6 +323,8 @@ class ModuleFileCache {
     ModuleFiles.insert_or_assign(ModuleName, ModuleFile);
   }
 
+  void remove(StringRef ModuleName);
+
 private:
   const GlobalCompilationDatabase &CDB;
 
@@ -353,86 +333,55 @@ class ModuleFileCache {
   std::mutex ModuleFilesMutex;
 };
 
-/// Collect the directly and indirectly required module names for \param
-/// ModuleName. The \param ModuleName is guaranteed to be the first element in
-/// \param ModuleNames.
-llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
-                                                   StringRef ModuleName) {
-  llvm::SmallVector<StringRef> ModuleNames;
-
-  std::queue<StringRef> Worklist;
-  llvm::StringSet<> ModuleNamesSet;
-  Worklist.push(ModuleName);
-
-  while (!Worklist.empty()) {
-    StringRef CurrentModule = Worklist.front();
-    Worklist.pop();
-
-    if (!ModuleNamesSet.insert(CurrentModule).second)
-      continue;
-
-    ModuleNames.push_back(CurrentModule);
-
-    for (StringRef RequiredModuleName :
-         MDB.getRequiredModules(MDB.getSourceForModuleName(CurrentModule)))
-      if (!ModuleNamesSet.contains(RequiredModuleName))
-        Worklist.push(RequiredModuleName);
-  }
-
-  return ModuleNames;
-}
-
 std::shared_ptr<const ModuleFile>
-ModuleFileCache::getValidModuleFile(StringRef ModuleName, ProjectModules &MDB,
-                                    const ThreadsafeFS &TFS,
-                                    PrerequisiteModules &BuiltModuleFiles) {
-  {
-    std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+ModuleFileCache::getModule(StringRef ModuleName) {
+  std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
-    auto Iter = ModuleFiles.find(ModuleName);
-    if (Iter == ModuleFiles.end())
-      return nullptr;
+  auto Iter = ModuleFiles.find(ModuleName);
+  if (Iter == ModuleFiles.end())
+    return nullptr;
 
-    if (Iter->second.expired()) {
-      ModuleFiles.erase(Iter);
-      return nullptr;
-    }
+  if (Iter->second.expired()) {
+    ModuleFiles.erase(Iter);
+    return nullptr;
   }
 
-  llvm::SmallVector<StringRef> ModuleNames =
-      getAllRequiredModules(MDB, ModuleName);
+  return Iter->second.lock();
+}
 
-  llvm::SmallVector<std::shared_ptr<const ModuleFile>> RequiredModuleFiles;
+void ModuleFileCache::remove(StringRef ModuleName) {
+  std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
-  {
-    std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
+  auto Iter = ModuleFiles.find(ModuleName);
+  if (Iter == ModuleFiles.end())
+    return;
 
-    for (StringRef ModuleName : ModuleNames) {
-      auto Iter = ModuleFiles.find(ModuleName);
-      if (Iter == ModuleFiles.end())
-        return nullptr;
+  ModuleFiles.erase(Iter);
+}
 
-      if (Iter->second.expired()) {
-        ModuleFiles.erase(Iter);
-        return nullptr;
-      }
+/// Collect the directly and indirectly required module names for \param
+/// ModuleName in topological order. The \param ModuleName is guaranteed to
+/// be the last element in \param ModuleNames.
+llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
+                                                   StringRef ModuleName) {
+  llvm::SmallVector<StringRef> ModuleNames;
+  llvm::StringSet<> ModuleNamesSet;
 
-      RequiredModuleFiles.push_back(Iter->second.lock());
-    }
-  }
+  auto traversal = [&](StringRef ModuleName, auto recursionHelper) -> void {
+    ModuleNamesSet.insert(ModuleName);
 
-  assert(!RequiredModuleFiles.empty());
+    for (StringRef RequiredModuleName :
+         MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
+      if (ModuleNamesSet.insert(RequiredModuleName).second)
+        recursionHelper(RequiredModuleName, recursionHelper);
 
-  if (llvm::any_of(RequiredModuleFiles,
-                   [&](std::shared_ptr<const ModuleFile> MF) {
-                     return !IsModuleFileUpToDate(MF->getModuleFilePath(),
-                                                  BuiltModuleFiles,
-                                                  TFS.view(std::nullopt));
-                   }))
-    return nullptr;
+    ModuleNames.push_back(ModuleName);
+  };
+  traversal(ModuleName, traversal);
 
-  return RequiredModuleFiles[0];
+  return ModuleNames;
 }
+
 } // namespace
 
 class ModulesBuilder::ModulesBuilderImpl {
@@ -468,34 +417,37 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
     return llvm::createStringError(
         llvm::formatv("Don't get the module unit for module {0}", ModuleName));
 
-  for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName))
-    // Return early if there are errors building the module file.
-    if (!getOrBuildModuleFile(RequiredModuleName, TFS, MDB, BuiltModuleFiles))
-      return llvm::createStringError(
-          llvm::formatv("Failed to build module {0}", RequiredModuleName));
-
-  if (std::shared_ptr<const ModuleFile> Cached =
-          Cache.getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) {
-    log("Reusing module {0} from {1}", ModuleName, Cached->getModuleFilePath());
-    BuiltModuleFiles.addModuleFile(Cached);
-    return llvm::Error::success();
-  }
+  // Get Required modules in topological order.
+  auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
+  for (llvm::StringRef ReqModuleName : ReqModuleNames) {
+    if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
+      continue;
 
-  log("Building module {0}", ModuleName);
+    if (auto Cached = Cache.getModule(ReqModuleName)) {
+      if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
+                               TFS.view(std::nullopt))) {
+        log("Reusing module {0} from {1}", ModuleName,
+            Cached->getModuleFilePath());
+        BuiltModuleFiles.addModuleFile(std::move(Cached));
+        continue;
+      }
+      Cache.remove(ReqModuleName);
+    }
 
-  llvm::SmallString<256> ModuleFilesPrefix =
-      getUniqueModuleFilesPath(ModuleUnitFileName);
+    llvm::SmallString<256> ModuleFilesPrefix =
+        getUniqueModuleFilesPath(ModuleUnitFileName);
 
-  llvm::Expected<ModuleFile> MF =
-      buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS,
-                      ModuleFilesPrefix, BuiltModuleFiles);
-  if (llvm::Error Err = MF.takeError())
-    return Err;
+    llvm::Expected<ModuleFile> MF =
+        buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS,
+                        ModuleFilesPrefix, BuiltModuleFiles);
+    if (llvm::Error Err = MF.takeError())
+      return Err;
 
-  log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
-  auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
-  Cache.add(ModuleName, BuiltModuleFile);
-  BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
+    log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
+    auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
+    Cache.add(ModuleName, BuiltModuleFile);
+    BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
+  }
 
   return llvm::Error::success();
 }

>From 821dc8dcbf6dffee3d61e0c435a45ba42bb71597 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:23:13 +0800
Subject: [PATCH 6/7] address comments

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 44 +++++++--------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 2b68335b493c87..5c581fdafcee2e 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -247,7 +247,6 @@ bool IsModuleFilesUpToDate(
 llvm::Expected<ModuleFile>
 buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
                 const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
-                PathRef ModuleFilesPrefix,
                 const ReusablePrerequisiteModules &BuiltModuleFiles) {
   // Try cheap operation earlier to boil-out cheaply if there are problems.
   auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
@@ -255,6 +254,9 @@ buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
     return llvm::createStringError(
         llvm::formatv("No compile command for {0}", ModuleUnitFileName));
 
+  llvm::SmallString<256> ModuleFilesPrefix =
+        getUniqueModuleFilesPath(ModuleUnitFileName);
+
   Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
 
   ParseInputs Inputs;
@@ -304,7 +306,7 @@ bool ReusablePrerequisiteModules::canReuse(
   if (RequiredModules.empty())
     return true;
 
-  SmallVector<StringRef> BMIPaths;
+  llvm::SmallVector<llvm::StringRef> BMIPaths;
   for (auto &MF : RequiredModules)
     BMIPaths.push_back(MF->getModuleFilePath());
   return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
@@ -320,7 +322,7 @@ class ModuleFileCache {
   void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
     std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
-    ModuleFiles.insert_or_assign(ModuleName, ModuleFile);
+    ModuleFiles[ModuleName] = ModuleFile;
   }
 
   void remove(StringRef ModuleName);
@@ -341,22 +343,17 @@ ModuleFileCache::getModule(StringRef ModuleName) {
   if (Iter == ModuleFiles.end())
     return nullptr;
 
-  if (Iter->second.expired()) {
-    ModuleFiles.erase(Iter);
-    return nullptr;
-  }
+  if (auto Res = Iter->second.lock())
+    return Res;
 
-  return Iter->second.lock();
+  ModuleFiles.erase(Iter);
+  return nullptr;
 }
 
 void ModuleFileCache::remove(StringRef ModuleName) {
   std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
 
-  auto Iter = ModuleFiles.find(ModuleName);
-  if (Iter == ModuleFiles.end())
-    return;
-
-  ModuleFiles.erase(Iter);
+  ModuleFiles.erase(ModuleName);
 }
 
 /// Collect the directly and indirectly required module names for \param
@@ -364,20 +361,20 @@ void ModuleFileCache::remove(StringRef ModuleName) {
 /// be the last element in \param ModuleNames.
 llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
                                                    StringRef ModuleName) {
-  llvm::SmallVector<StringRef> ModuleNames;
+  llvm::SmallVector<llvm::StringRef> ModuleNames;
   llvm::StringSet<> ModuleNamesSet;
 
-  auto traversal = [&](StringRef ModuleName, auto recursionHelper) -> void {
+  auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
     ModuleNamesSet.insert(ModuleName);
 
     for (StringRef RequiredModuleName :
          MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
       if (ModuleNamesSet.insert(RequiredModuleName).second)
-        recursionHelper(RequiredModuleName, recursionHelper);
+        Visitor(RequiredModuleName, Visitor);
 
     ModuleNames.push_back(ModuleName);
   };
-  traversal(ModuleName, traversal);
+  VisitDeps(ModuleName, VisitDeps);
 
   return ModuleNames;
 }
@@ -434,12 +431,9 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
       Cache.remove(ReqModuleName);
     }
 
-    llvm::SmallString<256> ModuleFilesPrefix =
-        getUniqueModuleFilesPath(ModuleUnitFileName);
-
     llvm::Expected<ModuleFile> MF =
         buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS,
-                        ModuleFilesPrefix, BuiltModuleFiles);
+                        BuiltModuleFiles);
     if (llvm::Error Err = MF.takeError())
       return Err;
 
@@ -465,13 +459,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
   if (RequiredModuleNames.empty())
     return std::make_unique<ReusablePrerequisiteModules>();
 
-  llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(File);
-
-  log("Trying to build required modules for {0} in {1}", File,
-      ModuleFilesPrefix);
-
   auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
-
   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
     // Return early if there is any error.
     if (llvm::Error Err = Impl->getOrBuildModuleFile(
@@ -482,8 +470,6 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
     }
   }
 
-  log("Built required modules for {0} in {1}", File, ModuleFilesPrefix);
-
   return std::move(RequiredModules);
 }
 

>From 483ee8d23a76081c9b7d5085ff716f19737a64ff Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 12 Nov 2024 17:29:54 +0800
Subject: [PATCH 7/7] fmt

---
 clang-tools-extra/clangd/ModulesBuilder.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp
index 5c581fdafcee2e..2bce3a20825616 100644
--- a/clang-tools-extra/clangd/ModulesBuilder.cpp
+++ b/clang-tools-extra/clangd/ModulesBuilder.cpp
@@ -255,7 +255,7 @@ buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
         llvm::formatv("No compile command for {0}", ModuleUnitFileName));
 
   llvm::SmallString<256> ModuleFilesPrefix =
-        getUniqueModuleFilesPath(ModuleUnitFileName);
+      getUniqueModuleFilesPath(ModuleUnitFileName);
 
   Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
 
@@ -431,9 +431,8 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
       Cache.remove(ReqModuleName);
     }
 
-    llvm::Expected<ModuleFile> MF =
-        buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS,
-                        BuiltModuleFiles);
+    llvm::Expected<ModuleFile> MF = buildModuleFile(
+        ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
     if (llvm::Error Err = MF.takeError())
       return Err;
 



More information about the cfe-commits mailing list