[clang] [clang][modules-driver] Add dependency scan and dependency graph (PR #152770)

Michael Spencer via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 29 11:37:18 PDT 2025


================
@@ -0,0 +1,1579 @@
+//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functionality to support driver managed builds for
+/// compilations which use Clang modules or standard C++20 named modules.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Driver/ModulesDriver.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticDriver.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Job.h"
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <atomic>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <tuple>
+#include <utility>
+
+using namespace llvm::opt;
+
+namespace clang::driver::modules {
+using JobVector = JobList::list_type;
+
+// The tooling::deps namespace has conflicting names with clang::driver, we
+// therefore introduce only the required tooling::deps namespace members into
+// this namespace.
+using tooling::dependencies::DependencyActionController;
+using tooling::dependencies::DependencyScanningService;
+using tooling::dependencies::DependencyScanningWorker;
+using tooling::dependencies::FullDependencyConsumer;
+using tooling::dependencies::ModuleDeps;
+using tooling::dependencies::ModuleDepsGraph;
+using tooling::dependencies::ModuleID;
+using tooling::dependencies::ModuleOutputKind;
+using tooling::dependencies::ScanningMode;
+using tooling::dependencies::ScanningOutputFormat;
+using tooling::dependencies::TranslationUnitDeps;
+
+/// Returns true if any source input is of type c++-module.
+static bool hasCXXNamedModuleInput(const InputList &Inputs) {
+  const auto IsTypeCXXModule = [](const auto &Input) -> bool {
+    const auto TypeID = Input.first;
+    return (TypeID == types::TY_CXXModule);
+  };
+  return any_of(Inputs, IsTypeCXXModule);
+}
+
+/// Scan the leading lines of each C++ source file until C++20 named module
+/// usage is detected.
+///
+/// \returns true if module usage is detected, false otherwise, or a
+/// llvm::FileError on read failure.
+static Expected<bool> scanForCXXNamedModuleUsage(const InputList &Inputs,
+                                                 llvm::vfs::FileSystem &VFS,
+                                                 DiagnosticsEngine &Diags) {
+  const auto CXXInputs = make_filter_range(
+      Inputs, [](const InputTy &Input) { return types::isCXX(Input.first); });
+  for (const auto &Input : CXXInputs) {
+    auto Filename = Input.second->getSpelling();
+    auto MemBufOrErr = VFS.getBufferForFile(Filename);
+    if (!MemBufOrErr)
+      return llvm::createFileError(Filename, MemBufOrErr.getError());
+    const auto MemBuf = std::move(*MemBufOrErr);
+
+    // Scan the buffer using the dependency directives scanner.
+    if (clang::scanInputForCXXNamedModulesUsage(MemBuf->getBuffer())) {
+      Diags.Report(diag::remark_found_cxx20_module_usage) << Filename;
+      return true;
+    }
+  }
+  return false;
+}
+
+Expected<bool> shouldUseModulesDriver(const InputList &Inputs,
+                                      llvm::vfs::FileSystem &FS,
+                                      DiagnosticsEngine &Diags) {
+  if (Inputs.size() < 2)
+    return false;
+  if (hasCXXNamedModuleInput(Inputs))
+    return true;
+  return scanForCXXNamedModuleUsage(Inputs, FS, Diags);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest::LocalModuleArgs &LocalArgs,
+                     llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.mapOptional("system-include-directories",
+                       LocalArgs.SystemIncludeDirs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest::Module &ModuleEntry,
+                     llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.map("is-std-library", ModuleEntry.IsStdlib) &&
+         O.map("logical-name", ModuleEntry.LogicalName) &&
+         O.map("source-path", ModuleEntry.SourcePath) &&
+         O.mapOptional("local-arguments", ModuleEntry.LocalArgs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest &Manifest, llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.map("modules", Manifest.ModuleEntries);
+}
+
+/// Parses the Standard library module manifest from \c Buffer.
+///
+/// The source file paths listed in the manifest are relative to its own
+/// path.
+static Expected<StdModuleManifest> parseStdModuleManifest(StringRef Buffer) {
+  auto ParsedJsonOrErr = llvm::json::parse(Buffer);
+  if (!ParsedJsonOrErr)
+    return ParsedJsonOrErr.takeError();
+
+  StdModuleManifest Manifest;
+  llvm::json::Path::Root Root;
+  if (!fromJSON(*ParsedJsonOrErr, Manifest, Root))
+    return Root.getError();
+
+  return Manifest;
+}
+
+/// Converts all file paths in \c Manifest from paths relative to
+/// \c ManifestPath (the manifest's location itself) to absolute.
+static void makeStdModuleManifestPathsAbsolute(StdModuleManifest &Manifest,
+                                               StringRef ManifestPath) {
+  SmallString<124> ManifestDir(ManifestPath);
+  llvm::sys::path::remove_filename(ManifestDir);
+
+  SmallString<256> TempPath;
+  auto ensureAbsolutePath = [&](std::string &Path) {
+    if (llvm::sys::path::is_absolute(Path))
+      return;
+    TempPath = ManifestDir;
+    llvm::sys::path::append(TempPath, Path);
+    llvm::sys::path::remove_dots(TempPath, true);
+    Path = std::string(TempPath);
+  };
+
+  for (auto &ModuleEntry : Manifest.ModuleEntries) {
+    ensureAbsolutePath(ModuleEntry.SourcePath);
+    if (!ModuleEntry.LocalArgs)
+      continue;
+    for (auto &IncludeDir : ModuleEntry.LocalArgs->SystemIncludeDirs)
+      ensureAbsolutePath(IncludeDir);
+  }
+}
+
+Expected<StdModuleManifest> readStdModuleManifest(StringRef ManifestPath,
+                                                  llvm::vfs::FileSystem &VFS) {
+  auto MemBufOrErr = VFS.getBufferForFile(ManifestPath);
+  if (!MemBufOrErr)
+    return llvm::createFileError(ManifestPath, MemBufOrErr.getError());
+  const auto MemBuf = std::move(*MemBufOrErr);
+
+  auto ManifestOrErr = parseStdModuleManifest(MemBuf->getBuffer());
+  if (!ManifestOrErr)
+    return ManifestOrErr.takeError();
+  auto Manifest = std::move(*ManifestOrErr);
+
+  // All paths in the manifest are relative to \c ManifestPath.
+  // Make them absolute.
+  makeStdModuleManifestPathsAbsolute(Manifest, ManifestPath);
+
+  return Manifest;
+}
+
+/// Appends a compilation input for the given \c Entry of the Standard library
+/// module manifest.
+static void
+appendStdModuleManifestInput(const StdModuleManifest::Module &ModuleEntry,
+                             Compilation &C, InputList &Inputs) {
+  auto &Args = C.getArgs();
+  const auto &Opts = C.getDriver().getOpts();
+
+  C.getDriver().DiagnoseInputExistence(Args, ModuleEntry.SourcePath,
+                                       types::TY_CXXModule,
+                                       /*TypoCorrect=*/false);
+
+  auto *A = new Arg(Opts.getOption(options::OPT_INPUT), ModuleEntry.SourcePath,
+                    Args.getBaseArgs().MakeIndex(ModuleEntry.SourcePath),
+                    Args.getBaseArgs().MakeArgString(ModuleEntry.SourcePath));
+  Args.AddSynthesizedArg(A);
+  A->claim();
+  Inputs.emplace_back(types::TY_CXXModule, A);
+}
+
+void buildStdModuleManifestInputs(const StdModuleManifest &Manifest,
+                                  Compilation &C, InputList &Inputs) {
+  for (const auto &Module : Manifest.ModuleEntries)
+    appendStdModuleManifestInput(Module, C, Inputs);
+}
+
+namespace {
+/// Represents a CharSourceRange within a StandaloneDiagnostic.
+struct SourceOffsetRange {
+  SourceOffsetRange(CharSourceRange Range, const SourceManager &SrcMgr,
+                    const LangOptions &LangOpts);
+  unsigned Begin = 0;
+  unsigned End = 0;
+  bool IsTokenRange = false;
+};
+
+/// Represents a FixItHint within a StandaloneDiagnostic.
+struct StandaloneFixIt {
+  StandaloneFixIt(const SourceManager &SrcMgr, const LangOptions &LangOpts,
+                  const FixItHint &FixIt);
+
+  SourceOffsetRange RemoveRange;
+  SourceOffsetRange InsertFromRange;
+  std::string CodeToInsert;
+  bool BeforePreviousInsertions = false;
+};
+
+/// Represents a StoredDiagnostic in a form that can be retained until after its
+/// SourceManager has been destroyed.
+///
+/// Source locations are stored as a combination of filename and offsets into
+/// that file.
+/// To report the diagnostic, it must first be translated back into a
+/// StoredDiagnostic with a new associated SourceManager.
+struct StandaloneDiagnostic {
+  explicit StandaloneDiagnostic(const StoredDiagnostic &StoredDiag);
+
+  LangOptions LangOpts;
+  SrcMgr::CharacteristicKind FileKind;
+  DiagnosticsEngine::Level Level;
+  unsigned ID = 0;
+  unsigned FileOffset = 0;
+  std::string Filename;
+  std::string Message;
+  SmallVector<SourceOffsetRange, 0> Ranges;
+  SmallVector<StandaloneFixIt, 0> FixIts;
+};
+
+using StandaloneDiagList = SmallVector<StandaloneDiagnostic, 0>;
+} // anonymous namespace
+
+SourceOffsetRange::SourceOffsetRange(CharSourceRange Range,
+                                     const SourceManager &SrcMgr,
+                                     const LangOptions &LangOpts)
+    : IsTokenRange(Range.isTokenRange()) {
+  const auto FileRange = Lexer::makeFileCharRange(Range, SrcMgr, LangOpts);
+  Begin = SrcMgr.getFileOffset(FileRange.getBegin());
+  End = SrcMgr.getFileOffset(FileRange.getEnd());
+}
+
+StandaloneFixIt::StandaloneFixIt(const SourceManager &SrcMgr,
+                                 const LangOptions &LangOpts,
+                                 const FixItHint &FixIt)
+    : RemoveRange(FixIt.RemoveRange, SrcMgr, LangOpts),
+      InsertFromRange(FixIt.InsertFromRange, SrcMgr, LangOpts),
+      CodeToInsert(FixIt.CodeToInsert),
+      BeforePreviousInsertions(FixIt.BeforePreviousInsertions) {}
+
+/// If a custom working directory is set for \c SrcMgr, returns the absolute
+/// path of \c Filename to make it independent. Otherwise, returns the original
+/// string.
+static std::string canonicalizeFilename(const SourceManager &SrcMgr,
+                                        StringRef Filename) {
+  SmallString<256> Abs(Filename);
+  if (!llvm::sys::path::is_absolute(Abs)) {
+    if (const auto &CWD =
+            SrcMgr.getFileManager().getFileSystemOpts().WorkingDir;
+        !CWD.empty())
+      llvm::sys::fs::make_absolute(CWD, Abs);
+  }
+  return std::string(Abs.str());
+}
+
+// FIXME: LangOpts is not properly saved because the LangOptions is not
+// copyable! clang/lib/Frontend/SerializedDiagnosticPrinter.cpp does currently
+// not serialize LangOpts either.
+StandaloneDiagnostic::StandaloneDiagnostic(const StoredDiagnostic &StoredDiag)
+    : Level(StoredDiag.getLevel()), ID(StoredDiag.getID()),
+      Message(StoredDiag.getMessage()) {
+  const FullSourceLoc &FullLoc = StoredDiag.getLocation();
+  // This is not an invalid diagnostic; invalid SourceLocations are used to
+  // represent diagnostics without a specific SourceLocation.
+  if (FullLoc.isInvalid())
+    return;
+
+  const auto &SrcMgr = FullLoc.getManager();
+  FileKind = SrcMgr.getFileCharacteristic(static_cast<SourceLocation>(FullLoc));
+  const auto FileLoc = SrcMgr.getFileLoc(static_cast<SourceLocation>(FullLoc));
+  FileOffset = SrcMgr.getFileOffset(FileLoc);
+  const auto PathRef = SrcMgr.getFilename(FileLoc);
+  assert(!PathRef.empty() && "diagnostic with location has no source file?");
+  Filename = canonicalizeFilename(SrcMgr, PathRef);
+
+  Ranges.reserve(StoredDiag.getRanges().size());
+  for (const auto &Range : StoredDiag.getRanges())
+    Ranges.emplace_back(Range, SrcMgr, LangOpts);
+
+  FixIts.reserve(StoredDiag.getFixIts().size());
+  for (const auto &FixIt : StoredDiag.getFixIts())
+    FixIts.emplace_back(SrcMgr, LangOpts, FixIt);
+}
+
+/// Translates \c StandaloneDiag into a StoredDiagnostic, associating it with
+/// the provided FileManager and SourceManager.
+static StoredDiagnostic
+translateStandaloneDiag(FileManager &FileMgr, SourceManager &SrcMgr,
+                        StandaloneDiagnostic &&StandaloneDiag) {
+  const auto FileRef = FileMgr.getOptionalFileRef(StandaloneDiag.Filename);
+  if (!FileRef)
+    return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+                            std::move(StandaloneDiag.Message));
+
+  const auto FileID =
+      SrcMgr.getOrCreateFileID(*FileRef, StandaloneDiag.FileKind);
+  const auto FileLoc = SrcMgr.getLocForStartOfFile(FileID);
+  assert(FileLoc.isValid() && "StandaloneDiagnostic should only use FilePath "
+                              "for encoding a valid source location.");
+  const auto DiagLoc = FileLoc.getLocWithOffset(StandaloneDiag.FileOffset);
+  const FullSourceLoc Loc(DiagLoc, SrcMgr);
+
+  auto ConvertOffsetRange = [&](const SourceOffsetRange &Range) {
+    return CharSourceRange(SourceRange(FileLoc.getLocWithOffset(Range.Begin),
+                                       FileLoc.getLocWithOffset(Range.End)),
+                           Range.IsTokenRange);
+  };
+
+  SmallVector<CharSourceRange, 0> TranslatedRanges;
+  TranslatedRanges.reserve(StandaloneDiag.Ranges.size());
+  transform(StandaloneDiag.Ranges, std::back_inserter(TranslatedRanges),
+            ConvertOffsetRange);
+
+  SmallVector<FixItHint, 0> TranslatedFixIts;
+  TranslatedFixIts.reserve(StandaloneDiag.FixIts.size());
+  for (const auto &FixIt : StandaloneDiag.FixIts) {
+    FixItHint TranslatedFixIt;
+    TranslatedFixIt.CodeToInsert = std::string(FixIt.CodeToInsert);
+    TranslatedFixIt.RemoveRange = ConvertOffsetRange(FixIt.RemoveRange);
+    TranslatedFixIt.InsertFromRange = ConvertOffsetRange(FixIt.InsertFromRange);
+    TranslatedFixIt.BeforePreviousInsertions = FixIt.BeforePreviousInsertions;
+    TranslatedFixIts.push_back(std::move(TranslatedFixIt));
+  }
+
+  return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+                          StandaloneDiag.Message, Loc, TranslatedRanges,
+                          TranslatedFixIts);
+}
+
+namespace {
+/// RAII utility to report StandaloneDiagnostics through a DiagnosticsEngine.
+///
+/// The driver's DiagnosticsEngine usually does not have a SourceManager at this
+/// point in building the compilation, in which case the StandaloneDiagReporter
+/// supplies its own.
+class StandaloneDiagReporter {
+public:
+  explicit StandaloneDiagReporter(DiagnosticsEngine &Diags) : Diags(Diags) {
+    if (!Diags.hasSourceManager()) {
+      FileSystemOptions Opts;
+      Opts.WorkingDir = ".";
+      OwnedFileMgr = llvm::makeIntrusiveRefCnt<FileManager>(std::move(Opts));
+      OwnedSrcMgr =
+          llvm::makeIntrusiveRefCnt<SourceManager>(Diags, *OwnedFileMgr);
+    }
+  }
+
+  /// Emits \c StandaloneDiag using the associated DiagnosticsEngine.
+  void Report(StandaloneDiagnostic &&StandaloneDiag) const {
+    const auto StoredDiag = translateStandaloneDiag(
+        getFileManager(), getSourceManager(), std::move(StandaloneDiag));
+    Diags.getClient()->BeginSourceFile(StandaloneDiag.LangOpts, nullptr);
+    Diags.Report(StoredDiag);
+    Diags.getClient()->EndSourceFile();
+  }
+
+  /// Emits all diagnostics in \c StandaloneDiags using the associated
+  /// DiagnosticsEngine.
+  void Report(SmallVectorImpl<StandaloneDiagnostic> &&StandaloneDiags) const {
+    for (auto &StandaloneDiag : StandaloneDiags)
+      Report(std::move(StandaloneDiag));
+  }
+
+private:
+  DiagnosticsEngine &Diags;
+  IntrusiveRefCntPtr<FileManager> OwnedFileMgr;
+  IntrusiveRefCntPtr<SourceManager> OwnedSrcMgr;
+
+  FileManager &getFileManager() const {
+    if (OwnedFileMgr)
+      return *OwnedFileMgr;
+    return Diags.getSourceManager().getFileManager();
+  }
+
+  SourceManager &getSourceManager() const {
+    if (OwnedSrcMgr)
+      return *OwnedSrcMgr;
+    return Diags.getSourceManager();
+  }
+};
+
+/// Collects diagnostics in a form that can be retained until after their
+/// associated SourceManager is destroyed.
+class StandaloneDiagCollector : public DiagnosticConsumer {
+public:
+  void BeginSourceFile(const LangOptions &LangOpts,
+                       const Preprocessor *PP = nullptr) override {}
+
+  void HandleDiagnostic(DiagnosticsEngine::Level Level,
+                        const Diagnostic &Info) override {
+    StoredDiagnostic StoredDiag(Level, Info);
+    StandaloneDiags.emplace_back(StoredDiag);
+    DiagnosticConsumer::HandleDiagnostic(Level, Info);
+  }
+
+  void EndSourceFile() override {}
+
+  StandaloneDiagList takeDiagnostics() { return std::move(StandaloneDiags); }
+
+private:
+  StandaloneDiagList StandaloneDiags;
+};
+} // anonymous namespace
+
+namespace {
+/// The full dependencies for a single compilation input.
+struct InputDependencies {
+  /// The identifier of the C++20 module this translation unit exports.
+  ///
+  /// If the translation unit is not a module then \c ID.ModuleName is empty.
+  ModuleID ID;
+
+  /// Whether this is a "system" module.
+  bool IsSystem;
+
+  /// A collection of absolute paths to files that this translation unit
+  /// directly depends on, not including transitive dependencies.
+  std::vector<std::string> FileDeps;
+
+  /// A list of modules this translation unit directly depends on, not including
+  /// transitive dependencies.
+  ///
+  /// This may include modules with a different context hash when it can be
+  /// determined that the differences are benign for this compilation.
+  std::vector<ModuleID> ClangModuleDeps;
+
+  /// A list of the C++20 named modules this translation unit depends on.
+  std::vector<std::string> NamedModuleDeps;
+
+  /// The compiler invocation with modifications to properly import all Clang
+  /// module dependencies. Does not include argv[0].
+  std::vector<std::string> BuildArgs;
+};
+
+/// The full dependencies for each compilation input and for all discovered
+/// Clang modules.
+struct DependencyScanResult {
+  /// The full dependencies for each compilation input, in the same order as the
+  /// inputs.
+  ///
+  /// System modules inputs that are not imported are represented as
+  /// std::nullopt.
+  llvm::SmallVector<std::optional<InputDependencies>> InputDeps;
+
+  /// The full Clang module dependenies for this compilation.
+  SmallVector<std::unique_ptr<ModuleDeps>> ClangModuleDeps;
+};
+
+/// Merges and deterministically orders scan results from multiple threads
+/// into a single DependencyScanResult.
+class ScanResultCollector {
+public:
+  explicit ScanResultCollector(size_t NumInputs) : InputDeps(NumInputs) {}
+
+  /// Adds the dependency scan result for the input at \c InputIndex.
+  ///
+  /// Thread safe, given that each index is written to exactly once.
+  void handleTUResult(TranslationUnitDeps &&TUDeps, bool IsSystem,
+                      size_t InputIndex);
+
+  /// Finalizes and takes the aggregated results.
+  ///
+  /// Not thread-safe.
+  DependencyScanResult takeScanResults();
+
+private:
+  /// Merges and deterministically orders Clang module dependencies.
+  class ClangModuleDepsCollector {
+  public:
+    void mergeGraph(ModuleDepsGraph &&Graph, size_t InputIndex);
+
+    SmallVector<std::unique_ptr<ModuleDeps>> takeOrderedModuleDeps();
+
+  private:
+    /// We need the output of dependency scan to be deterministic. However,
+    /// the dependency graph may contain two modules with the same name. How
----------------
Bigcheese wrote:

Since this is a merged graph from multiple inputs, multiple modules with the same name is not an error. When doing multi-arch builds, or offloading you need different PCMs for each target arch.

https://github.com/llvm/llvm-project/pull/152770


More information about the cfe-commits mailing list