[clang] [clang][modules-driver] Add dependency scan and dependency graph (PR #152770)

Naveen Seth Hanig via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 29 09:25:07 PDT 2025


================
@@ -0,0 +1,1579 @@
+//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functionality to support driver managed builds for
+/// compilations which use Clang modules or standard C++20 named modules.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Driver/ModulesDriver.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticDriver.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Job.h"
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <atomic>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <tuple>
+#include <utility>
+
+using namespace llvm::opt;
+
+namespace clang::driver::modules {
+using JobVector = JobList::list_type;
+
+// The tooling::deps namespace has conflicting names with clang::driver, we
+// therefore introduce only the required tooling::deps namespace members into
+// this namespace.
+using tooling::dependencies::DependencyActionController;
+using tooling::dependencies::DependencyScanningService;
+using tooling::dependencies::DependencyScanningWorker;
+using tooling::dependencies::FullDependencyConsumer;
+using tooling::dependencies::ModuleDeps;
+using tooling::dependencies::ModuleDepsGraph;
+using tooling::dependencies::ModuleID;
+using tooling::dependencies::ModuleOutputKind;
+using tooling::dependencies::ScanningMode;
+using tooling::dependencies::ScanningOutputFormat;
+using tooling::dependencies::TranslationUnitDeps;
+
+/// Returns true if any source input is of type c++-module.
+static bool hasCXXNamedModuleInput(const InputList &Inputs) {
+  const auto IsTypeCXXModule = [](const auto &Input) -> bool {
+    const auto TypeID = Input.first;
+    return (TypeID == types::TY_CXXModule);
+  };
+  return any_of(Inputs, IsTypeCXXModule);
+}
+
+/// Scan the leading lines of each C++ source file until C++20 named module
+/// usage is detected.
+///
+/// \returns true if module usage is detected, false otherwise, or a
+/// llvm::FileError on read failure.
+static Expected<bool> scanForCXXNamedModuleUsage(const InputList &Inputs,
+                                                 llvm::vfs::FileSystem &VFS,
+                                                 DiagnosticsEngine &Diags) {
+  const auto CXXInputs = make_filter_range(
+      Inputs, [](const InputTy &Input) { return types::isCXX(Input.first); });
+  for (const auto &Input : CXXInputs) {
+    auto Filename = Input.second->getSpelling();
+    auto MemBufOrErr = VFS.getBufferForFile(Filename);
+    if (!MemBufOrErr)
+      return llvm::createFileError(Filename, MemBufOrErr.getError());
+    const auto MemBuf = std::move(*MemBufOrErr);
+
+    // Scan the buffer using the dependency directives scanner.
+    if (clang::scanInputForCXXNamedModulesUsage(MemBuf->getBuffer())) {
+      Diags.Report(diag::remark_found_cxx20_module_usage) << Filename;
+      return true;
+    }
+  }
+  return false;
+}
+
+Expected<bool> shouldUseModulesDriver(const InputList &Inputs,
+                                      llvm::vfs::FileSystem &FS,
+                                      DiagnosticsEngine &Diags) {
+  if (Inputs.size() < 2)
+    return false;
+  if (hasCXXNamedModuleInput(Inputs))
+    return true;
+  return scanForCXXNamedModuleUsage(Inputs, FS, Diags);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest::LocalModuleArgs &LocalArgs,
+                     llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.mapOptional("system-include-directories",
+                       LocalArgs.SystemIncludeDirs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest::Module &ModuleEntry,
+                     llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.map("is-std-library", ModuleEntry.IsStdlib) &&
+         O.map("logical-name", ModuleEntry.LogicalName) &&
+         O.map("source-path", ModuleEntry.SourcePath) &&
+         O.mapOptional("local-arguments", ModuleEntry.LocalArgs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+                     StdModuleManifest &Manifest, llvm::json::Path P) {
+  llvm::json::ObjectMapper O(Params, P);
+  return O.map("modules", Manifest.ModuleEntries);
+}
+
+/// Parses the Standard library module manifest from \c Buffer.
+///
+/// The source file paths listed in the manifest are relative to its own
+/// path.
+static Expected<StdModuleManifest> parseStdModuleManifest(StringRef Buffer) {
+  auto ParsedJsonOrErr = llvm::json::parse(Buffer);
+  if (!ParsedJsonOrErr)
+    return ParsedJsonOrErr.takeError();
+
+  StdModuleManifest Manifest;
+  llvm::json::Path::Root Root;
+  if (!fromJSON(*ParsedJsonOrErr, Manifest, Root))
+    return Root.getError();
+
+  return Manifest;
+}
+
+/// Converts all file paths in \c Manifest from paths relative to
+/// \c ManifestPath (the manifest's location itself) to absolute.
+static void makeStdModuleManifestPathsAbsolute(StdModuleManifest &Manifest,
+                                               StringRef ManifestPath) {
+  SmallString<124> ManifestDir(ManifestPath);
+  llvm::sys::path::remove_filename(ManifestDir);
+
+  SmallString<256> TempPath;
+  auto ensureAbsolutePath = [&](std::string &Path) {
+    if (llvm::sys::path::is_absolute(Path))
+      return;
+    TempPath = ManifestDir;
+    llvm::sys::path::append(TempPath, Path);
+    llvm::sys::path::remove_dots(TempPath, true);
+    Path = std::string(TempPath);
+  };
+
+  for (auto &ModuleEntry : Manifest.ModuleEntries) {
+    ensureAbsolutePath(ModuleEntry.SourcePath);
+    if (!ModuleEntry.LocalArgs)
+      continue;
+    for (auto &IncludeDir : ModuleEntry.LocalArgs->SystemIncludeDirs)
+      ensureAbsolutePath(IncludeDir);
+  }
+}
+
+Expected<StdModuleManifest> readStdModuleManifest(StringRef ManifestPath,
+                                                  llvm::vfs::FileSystem &VFS) {
+  auto MemBufOrErr = VFS.getBufferForFile(ManifestPath);
+  if (!MemBufOrErr)
+    return llvm::createFileError(ManifestPath, MemBufOrErr.getError());
+  const auto MemBuf = std::move(*MemBufOrErr);
+
+  auto ManifestOrErr = parseStdModuleManifest(MemBuf->getBuffer());
+  if (!ManifestOrErr)
+    return ManifestOrErr.takeError();
+  auto Manifest = std::move(*ManifestOrErr);
+
+  // All paths in the manifest are relative to \c ManifestPath.
+  // Make them absolute.
+  makeStdModuleManifestPathsAbsolute(Manifest, ManifestPath);
+
+  return Manifest;
+}
+
+/// Appends a compilation input for the given \c Entry of the Standard library
+/// module manifest.
+static void
+appendStdModuleManifestInput(const StdModuleManifest::Module &ModuleEntry,
+                             Compilation &C, InputList &Inputs) {
+  auto &Args = C.getArgs();
+  const auto &Opts = C.getDriver().getOpts();
+
+  C.getDriver().DiagnoseInputExistence(Args, ModuleEntry.SourcePath,
+                                       types::TY_CXXModule,
+                                       /*TypoCorrect=*/false);
+
+  auto *A = new Arg(Opts.getOption(options::OPT_INPUT), ModuleEntry.SourcePath,
+                    Args.getBaseArgs().MakeIndex(ModuleEntry.SourcePath),
+                    Args.getBaseArgs().MakeArgString(ModuleEntry.SourcePath));
+  Args.AddSynthesizedArg(A);
+  A->claim();
+  Inputs.emplace_back(types::TY_CXXModule, A);
+}
+
+void buildStdModuleManifestInputs(const StdModuleManifest &Manifest,
+                                  Compilation &C, InputList &Inputs) {
+  for (const auto &Module : Manifest.ModuleEntries)
+    appendStdModuleManifestInput(Module, C, Inputs);
+}
+
+namespace {
+/// Represents a CharSourceRange within a StandaloneDiagnostic.
+struct SourceOffsetRange {
+  SourceOffsetRange(CharSourceRange Range, const SourceManager &SrcMgr,
+                    const LangOptions &LangOpts);
+  unsigned Begin = 0;
+  unsigned End = 0;
+  bool IsTokenRange = false;
+};
+
+/// Represents a FixItHint within a StandaloneDiagnostic.
+struct StandaloneFixIt {
+  StandaloneFixIt(const SourceManager &SrcMgr, const LangOptions &LangOpts,
+                  const FixItHint &FixIt);
+
+  SourceOffsetRange RemoveRange;
+  SourceOffsetRange InsertFromRange;
+  std::string CodeToInsert;
+  bool BeforePreviousInsertions = false;
+};
+
+/// Represents a StoredDiagnostic in a form that can be retained until after its
+/// SourceManager has been destroyed.
+///
+/// Source locations are stored as a combination of filename and offsets into
+/// that file.
+/// To report the diagnostic, it must first be translated back into a
+/// StoredDiagnostic with a new associated SourceManager.
+struct StandaloneDiagnostic {
+  explicit StandaloneDiagnostic(const StoredDiagnostic &StoredDiag);
+
+  LangOptions LangOpts;
+  SrcMgr::CharacteristicKind FileKind;
+  DiagnosticsEngine::Level Level;
+  unsigned ID = 0;
+  unsigned FileOffset = 0;
+  std::string Filename;
+  std::string Message;
+  SmallVector<SourceOffsetRange, 0> Ranges;
+  SmallVector<StandaloneFixIt, 0> FixIts;
+};
+
+using StandaloneDiagList = SmallVector<StandaloneDiagnostic, 0>;
+} // anonymous namespace
+
+SourceOffsetRange::SourceOffsetRange(CharSourceRange Range,
+                                     const SourceManager &SrcMgr,
+                                     const LangOptions &LangOpts)
+    : IsTokenRange(Range.isTokenRange()) {
+  const auto FileRange = Lexer::makeFileCharRange(Range, SrcMgr, LangOpts);
+  Begin = SrcMgr.getFileOffset(FileRange.getBegin());
+  End = SrcMgr.getFileOffset(FileRange.getEnd());
+}
+
+StandaloneFixIt::StandaloneFixIt(const SourceManager &SrcMgr,
+                                 const LangOptions &LangOpts,
+                                 const FixItHint &FixIt)
+    : RemoveRange(FixIt.RemoveRange, SrcMgr, LangOpts),
+      InsertFromRange(FixIt.InsertFromRange, SrcMgr, LangOpts),
+      CodeToInsert(FixIt.CodeToInsert),
+      BeforePreviousInsertions(FixIt.BeforePreviousInsertions) {}
+
+/// If a custom working directory is set for \c SrcMgr, returns the absolute
+/// path of \c Filename to make it independent. Otherwise, returns the original
+/// string.
+static std::string canonicalizeFilename(const SourceManager &SrcMgr,
+                                        StringRef Filename) {
+  SmallString<256> Abs(Filename);
+  if (!llvm::sys::path::is_absolute(Abs)) {
+    if (const auto &CWD =
+            SrcMgr.getFileManager().getFileSystemOpts().WorkingDir;
+        !CWD.empty())
+      llvm::sys::fs::make_absolute(CWD, Abs);
+  }
+  return std::string(Abs.str());
+}
+
+// FIXME: LangOpts is not properly saved because the LangOptions is not
+// copyable! clang/lib/Frontend/SerializedDiagnosticPrinter.cpp does currently
+// not serialize LangOpts either.
+StandaloneDiagnostic::StandaloneDiagnostic(const StoredDiagnostic &StoredDiag)
+    : Level(StoredDiag.getLevel()), ID(StoredDiag.getID()),
+      Message(StoredDiag.getMessage()) {
+  const FullSourceLoc &FullLoc = StoredDiag.getLocation();
+  // This is not an invalid diagnostic; invalid SourceLocations are used to
+  // represent diagnostics without a specific SourceLocation.
+  if (FullLoc.isInvalid())
+    return;
+
+  const auto &SrcMgr = FullLoc.getManager();
+  FileKind = SrcMgr.getFileCharacteristic(static_cast<SourceLocation>(FullLoc));
+  const auto FileLoc = SrcMgr.getFileLoc(static_cast<SourceLocation>(FullLoc));
+  FileOffset = SrcMgr.getFileOffset(FileLoc);
+  const auto PathRef = SrcMgr.getFilename(FileLoc);
+  assert(!PathRef.empty() && "diagnostic with location has no source file?");
+  Filename = canonicalizeFilename(SrcMgr, PathRef);
+
+  Ranges.reserve(StoredDiag.getRanges().size());
+  for (const auto &Range : StoredDiag.getRanges())
+    Ranges.emplace_back(Range, SrcMgr, LangOpts);
+
+  FixIts.reserve(StoredDiag.getFixIts().size());
+  for (const auto &FixIt : StoredDiag.getFixIts())
+    FixIts.emplace_back(SrcMgr, LangOpts, FixIt);
+}
+
+/// Translates \c StandaloneDiag into a StoredDiagnostic, associating it with
+/// the provided FileManager and SourceManager.
+static StoredDiagnostic
+translateStandaloneDiag(FileManager &FileMgr, SourceManager &SrcMgr,
+                        StandaloneDiagnostic &&StandaloneDiag) {
+  const auto FileRef = FileMgr.getOptionalFileRef(StandaloneDiag.Filename);
+  if (!FileRef)
+    return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+                            std::move(StandaloneDiag.Message));
+
+  const auto FileID =
+      SrcMgr.getOrCreateFileID(*FileRef, StandaloneDiag.FileKind);
+  const auto FileLoc = SrcMgr.getLocForStartOfFile(FileID);
+  assert(FileLoc.isValid() && "StandaloneDiagnostic should only use FilePath "
+                              "for encoding a valid source location.");
+  const auto DiagLoc = FileLoc.getLocWithOffset(StandaloneDiag.FileOffset);
+  const FullSourceLoc Loc(DiagLoc, SrcMgr);
+
+  auto ConvertOffsetRange = [&](const SourceOffsetRange &Range) {
+    return CharSourceRange(SourceRange(FileLoc.getLocWithOffset(Range.Begin),
+                                       FileLoc.getLocWithOffset(Range.End)),
+                           Range.IsTokenRange);
+  };
+
+  SmallVector<CharSourceRange, 0> TranslatedRanges;
+  TranslatedRanges.reserve(StandaloneDiag.Ranges.size());
+  transform(StandaloneDiag.Ranges, std::back_inserter(TranslatedRanges),
+            ConvertOffsetRange);
+
+  SmallVector<FixItHint, 0> TranslatedFixIts;
+  TranslatedFixIts.reserve(StandaloneDiag.FixIts.size());
+  for (const auto &FixIt : StandaloneDiag.FixIts) {
+    FixItHint TranslatedFixIt;
+    TranslatedFixIt.CodeToInsert = std::string(FixIt.CodeToInsert);
+    TranslatedFixIt.RemoveRange = ConvertOffsetRange(FixIt.RemoveRange);
+    TranslatedFixIt.InsertFromRange = ConvertOffsetRange(FixIt.InsertFromRange);
+    TranslatedFixIt.BeforePreviousInsertions = FixIt.BeforePreviousInsertions;
+    TranslatedFixIts.push_back(std::move(TranslatedFixIt));
+  }
+
+  return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+                          StandaloneDiag.Message, Loc, TranslatedRanges,
+                          TranslatedFixIts);
+}
+
+namespace {
+/// RAII utility to report StandaloneDiagnostics through a DiagnosticsEngine.
+///
+/// The driver's DiagnosticsEngine usually does not have a SourceManager at this
+/// point in building the compilation, in which case the StandaloneDiagReporter
+/// supplies its own.
+class StandaloneDiagReporter {
+public:
+  explicit StandaloneDiagReporter(DiagnosticsEngine &Diags) : Diags(Diags) {
+    if (!Diags.hasSourceManager()) {
+      FileSystemOptions Opts;
+      Opts.WorkingDir = ".";
+      OwnedFileMgr = llvm::makeIntrusiveRefCnt<FileManager>(std::move(Opts));
+      OwnedSrcMgr =
+          llvm::makeIntrusiveRefCnt<SourceManager>(Diags, *OwnedFileMgr);
+    }
+  }
+
+  /// Emits \c StandaloneDiag using the associated DiagnosticsEngine.
+  void Report(StandaloneDiagnostic &&StandaloneDiag) const {
+    const auto StoredDiag = translateStandaloneDiag(
+        getFileManager(), getSourceManager(), std::move(StandaloneDiag));
+    Diags.getClient()->BeginSourceFile(StandaloneDiag.LangOpts, nullptr);
+    Diags.Report(StoredDiag);
+    Diags.getClient()->EndSourceFile();
+  }
+
+  /// Emits all diagnostics in \c StandaloneDiags using the associated
+  /// DiagnosticsEngine.
+  void Report(SmallVectorImpl<StandaloneDiagnostic> &&StandaloneDiags) const {
+    for (auto &StandaloneDiag : StandaloneDiags)
+      Report(std::move(StandaloneDiag));
+  }
+
+private:
+  DiagnosticsEngine &Diags;
+  IntrusiveRefCntPtr<FileManager> OwnedFileMgr;
+  IntrusiveRefCntPtr<SourceManager> OwnedSrcMgr;
+
+  FileManager &getFileManager() const {
+    if (OwnedFileMgr)
+      return *OwnedFileMgr;
+    return Diags.getSourceManager().getFileManager();
+  }
+
+  SourceManager &getSourceManager() const {
+    if (OwnedSrcMgr)
+      return *OwnedSrcMgr;
+    return Diags.getSourceManager();
+  }
+};
+
+/// Collects diagnostics in a form that can be retained until after their
+/// associated SourceManager is destroyed.
+class StandaloneDiagCollector : public DiagnosticConsumer {
+public:
+  void BeginSourceFile(const LangOptions &LangOpts,
+                       const Preprocessor *PP = nullptr) override {}
+
+  void HandleDiagnostic(DiagnosticsEngine::Level Level,
+                        const Diagnostic &Info) override {
+    StoredDiagnostic StoredDiag(Level, Info);
+    StandaloneDiags.emplace_back(StoredDiag);
+    DiagnosticConsumer::HandleDiagnostic(Level, Info);
+  }
+
+  void EndSourceFile() override {}
+
+  StandaloneDiagList takeDiagnostics() { return std::move(StandaloneDiags); }
+
+private:
+  StandaloneDiagList StandaloneDiags;
+};
+} // anonymous namespace
+
+namespace {
+/// The full dependencies for a single compilation input.
+struct InputDependencies {
+  /// The identifier of the C++20 module this translation unit exports.
+  ///
+  /// If the translation unit is not a module then \c ID.ModuleName is empty.
+  ModuleID ID;
+
+  /// Whether this is a "system" module.
+  bool IsSystem;
+
+  /// A collection of absolute paths to files that this translation unit
+  /// directly depends on, not including transitive dependencies.
+  std::vector<std::string> FileDeps;
+
+  /// A list of modules this translation unit directly depends on, not including
+  /// transitive dependencies.
+  ///
+  /// This may include modules with a different context hash when it can be
+  /// determined that the differences are benign for this compilation.
+  std::vector<ModuleID> ClangModuleDeps;
+
+  /// A list of the C++20 named modules this translation unit depends on.
+  std::vector<std::string> NamedModuleDeps;
+
+  /// The compiler invocation with modifications to properly import all Clang
+  /// module dependencies. Does not include argv[0].
+  std::vector<std::string> BuildArgs;
+};
+
+/// The full dependencies for each compilation input and for all discovered
+/// Clang modules.
+struct DependencyScanResult {
+  /// The full dependencies for each compilation input, in the same order as the
+  /// inputs.
+  ///
+  /// System modules inputs that are not imported are represented as
+  /// std::nullopt.
+  llvm::SmallVector<std::optional<InputDependencies>> InputDeps;
+
+  /// The full Clang module dependenies for this compilation.
+  SmallVector<std::unique_ptr<ModuleDeps>> ClangModuleDeps;
+};
+
+/// Merges and deterministically orders scan results from multiple threads
+/// into a single DependencyScanResult.
+class ScanResultCollector {
+public:
+  explicit ScanResultCollector(size_t NumInputs) : InputDeps(NumInputs) {}
+
+  /// Adds the dependency scan result for the input at \c InputIndex.
+  ///
+  /// Thread safe, given that each index is written to exactly once.
+  void handleTUResult(TranslationUnitDeps &&TUDeps, bool IsSystem,
+                      size_t InputIndex);
+
+  /// Finalizes and takes the aggregated results.
+  ///
+  /// Not thread-safe.
+  DependencyScanResult takeScanResults();
+
+private:
+  /// Merges and deterministically orders Clang module dependencies.
+  class ClangModuleDepsCollector {
+  public:
+    void mergeGraph(ModuleDepsGraph &&Graph, size_t InputIndex);
+
+    SmallVector<std::unique_ptr<ModuleDeps>> takeOrderedModuleDeps();
+
+  private:
+    /// We need the output of dependency scan to be deterministic. However,
----------------
naveen-seth wrote:

I didn’t mean this as a FIXME, but just to explain why we are making the extra effort to sort by (module name, index of the first importing input) instead of (module name, module id).

The comment was just copied from `ClangScanDeps.cpp`, where the same sorting is done (but slightly different, since there we store the final results in a sorted vector rather than a map):

https://github.com/llvm/llvm-project/blob/f9dbf738d81492eb7891655c5d4b2f481215eee0/clang/tools/clang-scan-deps/ClangScanDeps.cpp#L589

Happy to adjust this if needed.

https://github.com/llvm/llvm-project/pull/152770


More information about the cfe-commits mailing list