[clang] [clang][modules-driver] Add dependency scan and dependency graph (PR #152770)
Michael Spencer via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 29 11:37:18 PDT 2025
================
@@ -0,0 +1,1579 @@
+//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functionality to support driver managed builds for
+/// compilations which use Clang modules or standard C++20 named modules.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Driver/ModulesDriver.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticDriver.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Job.h"
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <atomic>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <tuple>
+#include <utility>
+
+using namespace llvm::opt;
+
+namespace clang::driver::modules {
+using JobVector = JobList::list_type;
+
+// The tooling::deps namespace has conflicting names with clang::driver, we
+// therefore introduce only the required tooling::deps namespace members into
+// this namespace.
+using tooling::dependencies::DependencyActionController;
+using tooling::dependencies::DependencyScanningService;
+using tooling::dependencies::DependencyScanningWorker;
+using tooling::dependencies::FullDependencyConsumer;
+using tooling::dependencies::ModuleDeps;
+using tooling::dependencies::ModuleDepsGraph;
+using tooling::dependencies::ModuleID;
+using tooling::dependencies::ModuleOutputKind;
+using tooling::dependencies::ScanningMode;
+using tooling::dependencies::ScanningOutputFormat;
+using tooling::dependencies::TranslationUnitDeps;
+
+/// Returns true if any source input is of type c++-module.
+static bool hasCXXNamedModuleInput(const InputList &Inputs) {
+ const auto IsTypeCXXModule = [](const auto &Input) -> bool {
+ const auto TypeID = Input.first;
+ return (TypeID == types::TY_CXXModule);
+ };
+ return any_of(Inputs, IsTypeCXXModule);
+}
+
+/// Scan the leading lines of each C++ source file until C++20 named module
+/// usage is detected.
+///
+/// \returns true if module usage is detected, false otherwise, or a
+/// llvm::FileError on read failure.
+static Expected<bool> scanForCXXNamedModuleUsage(const InputList &Inputs,
+ llvm::vfs::FileSystem &VFS,
+ DiagnosticsEngine &Diags) {
+ const auto CXXInputs = make_filter_range(
+ Inputs, [](const InputTy &Input) { return types::isCXX(Input.first); });
+ for (const auto &Input : CXXInputs) {
+ auto Filename = Input.second->getSpelling();
+ auto MemBufOrErr = VFS.getBufferForFile(Filename);
+ if (!MemBufOrErr)
+ return llvm::createFileError(Filename, MemBufOrErr.getError());
+ const auto MemBuf = std::move(*MemBufOrErr);
+
+ // Scan the buffer using the dependency directives scanner.
+ if (clang::scanInputForCXXNamedModulesUsage(MemBuf->getBuffer())) {
+ Diags.Report(diag::remark_found_cxx20_module_usage) << Filename;
+ return true;
+ }
+ }
+ return false;
+}
+
+Expected<bool> shouldUseModulesDriver(const InputList &Inputs,
+ llvm::vfs::FileSystem &FS,
+ DiagnosticsEngine &Diags) {
+ if (Inputs.size() < 2)
+ return false;
+ if (hasCXXNamedModuleInput(Inputs))
+ return true;
+ return scanForCXXNamedModuleUsage(Inputs, FS, Diags);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+ StdModuleManifest::LocalModuleArgs &LocalArgs,
+ llvm::json::Path P) {
+ llvm::json::ObjectMapper O(Params, P);
+ return O.mapOptional("system-include-directories",
+ LocalArgs.SystemIncludeDirs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+ StdModuleManifest::Module &ModuleEntry,
+ llvm::json::Path P) {
+ llvm::json::ObjectMapper O(Params, P);
+ return O.map("is-std-library", ModuleEntry.IsStdlib) &&
+ O.map("logical-name", ModuleEntry.LogicalName) &&
+ O.map("source-path", ModuleEntry.SourcePath) &&
+ O.mapOptional("local-arguments", ModuleEntry.LocalArgs);
+}
+
+static bool fromJSON(const llvm::json::Value &Params,
+ StdModuleManifest &Manifest, llvm::json::Path P) {
+ llvm::json::ObjectMapper O(Params, P);
+ return O.map("modules", Manifest.ModuleEntries);
+}
+
+/// Parses the Standard library module manifest from \c Buffer.
+///
+/// The source file paths listed in the manifest are relative to its own
+/// path.
+static Expected<StdModuleManifest> parseStdModuleManifest(StringRef Buffer) {
+ auto ParsedJsonOrErr = llvm::json::parse(Buffer);
+ if (!ParsedJsonOrErr)
+ return ParsedJsonOrErr.takeError();
+
+ StdModuleManifest Manifest;
+ llvm::json::Path::Root Root;
+ if (!fromJSON(*ParsedJsonOrErr, Manifest, Root))
+ return Root.getError();
+
+ return Manifest;
+}
+
+/// Converts all file paths in \c Manifest from paths relative to
+/// \c ManifestPath (the manifest's location itself) to absolute.
+static void makeStdModuleManifestPathsAbsolute(StdModuleManifest &Manifest,
+ StringRef ManifestPath) {
+ SmallString<124> ManifestDir(ManifestPath);
+ llvm::sys::path::remove_filename(ManifestDir);
+
+ SmallString<256> TempPath;
+ auto ensureAbsolutePath = [&](std::string &Path) {
+ if (llvm::sys::path::is_absolute(Path))
+ return;
+ TempPath = ManifestDir;
+ llvm::sys::path::append(TempPath, Path);
+ llvm::sys::path::remove_dots(TempPath, true);
+ Path = std::string(TempPath);
+ };
+
+ for (auto &ModuleEntry : Manifest.ModuleEntries) {
+ ensureAbsolutePath(ModuleEntry.SourcePath);
+ if (!ModuleEntry.LocalArgs)
+ continue;
+ for (auto &IncludeDir : ModuleEntry.LocalArgs->SystemIncludeDirs)
+ ensureAbsolutePath(IncludeDir);
+ }
+}
+
+Expected<StdModuleManifest> readStdModuleManifest(StringRef ManifestPath,
+ llvm::vfs::FileSystem &VFS) {
+ auto MemBufOrErr = VFS.getBufferForFile(ManifestPath);
+ if (!MemBufOrErr)
+ return llvm::createFileError(ManifestPath, MemBufOrErr.getError());
+ const auto MemBuf = std::move(*MemBufOrErr);
+
+ auto ManifestOrErr = parseStdModuleManifest(MemBuf->getBuffer());
+ if (!ManifestOrErr)
+ return ManifestOrErr.takeError();
+ auto Manifest = std::move(*ManifestOrErr);
+
+ // All paths in the manifest are relative to \c ManifestPath.
+ // Make them absolute.
+ makeStdModuleManifestPathsAbsolute(Manifest, ManifestPath);
+
+ return Manifest;
+}
+
+/// Appends a compilation input for the given \c Entry of the Standard library
+/// module manifest.
+static void
+appendStdModuleManifestInput(const StdModuleManifest::Module &ModuleEntry,
+ Compilation &C, InputList &Inputs) {
+ auto &Args = C.getArgs();
+ const auto &Opts = C.getDriver().getOpts();
+
+ C.getDriver().DiagnoseInputExistence(Args, ModuleEntry.SourcePath,
+ types::TY_CXXModule,
+ /*TypoCorrect=*/false);
+
+ auto *A = new Arg(Opts.getOption(options::OPT_INPUT), ModuleEntry.SourcePath,
+ Args.getBaseArgs().MakeIndex(ModuleEntry.SourcePath),
+ Args.getBaseArgs().MakeArgString(ModuleEntry.SourcePath));
+ Args.AddSynthesizedArg(A);
+ A->claim();
+ Inputs.emplace_back(types::TY_CXXModule, A);
+}
+
+void buildStdModuleManifestInputs(const StdModuleManifest &Manifest,
+ Compilation &C, InputList &Inputs) {
+ for (const auto &Module : Manifest.ModuleEntries)
+ appendStdModuleManifestInput(Module, C, Inputs);
+}
+
+namespace {
+/// Represents a CharSourceRange within a StandaloneDiagnostic.
+struct SourceOffsetRange {
+ SourceOffsetRange(CharSourceRange Range, const SourceManager &SrcMgr,
+ const LangOptions &LangOpts);
+ unsigned Begin = 0;
+ unsigned End = 0;
+ bool IsTokenRange = false;
+};
+
+/// Represents a FixItHint within a StandaloneDiagnostic.
+struct StandaloneFixIt {
+ StandaloneFixIt(const SourceManager &SrcMgr, const LangOptions &LangOpts,
+ const FixItHint &FixIt);
+
+ SourceOffsetRange RemoveRange;
+ SourceOffsetRange InsertFromRange;
+ std::string CodeToInsert;
+ bool BeforePreviousInsertions = false;
+};
+
+/// Represents a StoredDiagnostic in a form that can be retained until after its
+/// SourceManager has been destroyed.
+///
+/// Source locations are stored as a combination of filename and offsets into
+/// that file.
+/// To report the diagnostic, it must first be translated back into a
+/// StoredDiagnostic with a new associated SourceManager.
+struct StandaloneDiagnostic {
+ explicit StandaloneDiagnostic(const StoredDiagnostic &StoredDiag);
+
+ LangOptions LangOpts;
+ SrcMgr::CharacteristicKind FileKind;
+ DiagnosticsEngine::Level Level;
+ unsigned ID = 0;
+ unsigned FileOffset = 0;
+ std::string Filename;
+ std::string Message;
+ SmallVector<SourceOffsetRange, 0> Ranges;
+ SmallVector<StandaloneFixIt, 0> FixIts;
+};
+
+using StandaloneDiagList = SmallVector<StandaloneDiagnostic, 0>;
+} // anonymous namespace
+
+SourceOffsetRange::SourceOffsetRange(CharSourceRange Range,
+ const SourceManager &SrcMgr,
+ const LangOptions &LangOpts)
+ : IsTokenRange(Range.isTokenRange()) {
+ const auto FileRange = Lexer::makeFileCharRange(Range, SrcMgr, LangOpts);
+ Begin = SrcMgr.getFileOffset(FileRange.getBegin());
+ End = SrcMgr.getFileOffset(FileRange.getEnd());
+}
+
+StandaloneFixIt::StandaloneFixIt(const SourceManager &SrcMgr,
+ const LangOptions &LangOpts,
+ const FixItHint &FixIt)
+ : RemoveRange(FixIt.RemoveRange, SrcMgr, LangOpts),
+ InsertFromRange(FixIt.InsertFromRange, SrcMgr, LangOpts),
+ CodeToInsert(FixIt.CodeToInsert),
+ BeforePreviousInsertions(FixIt.BeforePreviousInsertions) {}
+
+/// If a custom working directory is set for \c SrcMgr, returns the absolute
+/// path of \c Filename to make it independent. Otherwise, returns the original
+/// string.
+static std::string canonicalizeFilename(const SourceManager &SrcMgr,
+ StringRef Filename) {
+ SmallString<256> Abs(Filename);
+ if (!llvm::sys::path::is_absolute(Abs)) {
+ if (const auto &CWD =
+ SrcMgr.getFileManager().getFileSystemOpts().WorkingDir;
+ !CWD.empty())
+ llvm::sys::fs::make_absolute(CWD, Abs);
+ }
+ return std::string(Abs.str());
+}
+
+// FIXME: LangOpts is not properly saved because the LangOptions is not
+// copyable! clang/lib/Frontend/SerializedDiagnosticPrinter.cpp does currently
+// not serialize LangOpts either.
+StandaloneDiagnostic::StandaloneDiagnostic(const StoredDiagnostic &StoredDiag)
+ : Level(StoredDiag.getLevel()), ID(StoredDiag.getID()),
+ Message(StoredDiag.getMessage()) {
+ const FullSourceLoc &FullLoc = StoredDiag.getLocation();
+ // This is not an invalid diagnostic; invalid SourceLocations are used to
+ // represent diagnostics without a specific SourceLocation.
+ if (FullLoc.isInvalid())
+ return;
+
+ const auto &SrcMgr = FullLoc.getManager();
+ FileKind = SrcMgr.getFileCharacteristic(static_cast<SourceLocation>(FullLoc));
+ const auto FileLoc = SrcMgr.getFileLoc(static_cast<SourceLocation>(FullLoc));
+ FileOffset = SrcMgr.getFileOffset(FileLoc);
+ const auto PathRef = SrcMgr.getFilename(FileLoc);
+ assert(!PathRef.empty() && "diagnostic with location has no source file?");
+ Filename = canonicalizeFilename(SrcMgr, PathRef);
+
+ Ranges.reserve(StoredDiag.getRanges().size());
+ for (const auto &Range : StoredDiag.getRanges())
+ Ranges.emplace_back(Range, SrcMgr, LangOpts);
+
+ FixIts.reserve(StoredDiag.getFixIts().size());
+ for (const auto &FixIt : StoredDiag.getFixIts())
+ FixIts.emplace_back(SrcMgr, LangOpts, FixIt);
+}
+
+/// Translates \c StandaloneDiag into a StoredDiagnostic, associating it with
+/// the provided FileManager and SourceManager.
+static StoredDiagnostic
+translateStandaloneDiag(FileManager &FileMgr, SourceManager &SrcMgr,
+ StandaloneDiagnostic &&StandaloneDiag) {
+ const auto FileRef = FileMgr.getOptionalFileRef(StandaloneDiag.Filename);
+ if (!FileRef)
+ return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+ std::move(StandaloneDiag.Message));
+
+ const auto FileID =
+ SrcMgr.getOrCreateFileID(*FileRef, StandaloneDiag.FileKind);
+ const auto FileLoc = SrcMgr.getLocForStartOfFile(FileID);
+ assert(FileLoc.isValid() && "StandaloneDiagnostic should only use FilePath "
+ "for encoding a valid source location.");
+ const auto DiagLoc = FileLoc.getLocWithOffset(StandaloneDiag.FileOffset);
+ const FullSourceLoc Loc(DiagLoc, SrcMgr);
+
+ auto ConvertOffsetRange = [&](const SourceOffsetRange &Range) {
+ return CharSourceRange(SourceRange(FileLoc.getLocWithOffset(Range.Begin),
+ FileLoc.getLocWithOffset(Range.End)),
+ Range.IsTokenRange);
+ };
+
+ SmallVector<CharSourceRange, 0> TranslatedRanges;
+ TranslatedRanges.reserve(StandaloneDiag.Ranges.size());
+ transform(StandaloneDiag.Ranges, std::back_inserter(TranslatedRanges),
+ ConvertOffsetRange);
+
+ SmallVector<FixItHint, 0> TranslatedFixIts;
+ TranslatedFixIts.reserve(StandaloneDiag.FixIts.size());
+ for (const auto &FixIt : StandaloneDiag.FixIts) {
+ FixItHint TranslatedFixIt;
+ TranslatedFixIt.CodeToInsert = std::string(FixIt.CodeToInsert);
+ TranslatedFixIt.RemoveRange = ConvertOffsetRange(FixIt.RemoveRange);
+ TranslatedFixIt.InsertFromRange = ConvertOffsetRange(FixIt.InsertFromRange);
+ TranslatedFixIt.BeforePreviousInsertions = FixIt.BeforePreviousInsertions;
+ TranslatedFixIts.push_back(std::move(TranslatedFixIt));
+ }
+
+ return StoredDiagnostic(StandaloneDiag.Level, StandaloneDiag.ID,
+ StandaloneDiag.Message, Loc, TranslatedRanges,
+ TranslatedFixIts);
+}
+
+namespace {
+/// RAII utility to report StandaloneDiagnostics through a DiagnosticsEngine.
+///
+/// The driver's DiagnosticsEngine usually does not have a SourceManager at this
+/// point in building the compilation, in which case the StandaloneDiagReporter
+/// supplies its own.
+class StandaloneDiagReporter {
+public:
+ explicit StandaloneDiagReporter(DiagnosticsEngine &Diags) : Diags(Diags) {
+ if (!Diags.hasSourceManager()) {
+ FileSystemOptions Opts;
+ Opts.WorkingDir = ".";
+ OwnedFileMgr = llvm::makeIntrusiveRefCnt<FileManager>(std::move(Opts));
+ OwnedSrcMgr =
+ llvm::makeIntrusiveRefCnt<SourceManager>(Diags, *OwnedFileMgr);
+ }
+ }
+
+ /// Emits \c StandaloneDiag using the associated DiagnosticsEngine.
+ void Report(StandaloneDiagnostic &&StandaloneDiag) const {
+ const auto StoredDiag = translateStandaloneDiag(
+ getFileManager(), getSourceManager(), std::move(StandaloneDiag));
+ Diags.getClient()->BeginSourceFile(StandaloneDiag.LangOpts, nullptr);
+ Diags.Report(StoredDiag);
+ Diags.getClient()->EndSourceFile();
+ }
+
+ /// Emits all diagnostics in \c StandaloneDiags using the associated
+ /// DiagnosticsEngine.
+ void Report(SmallVectorImpl<StandaloneDiagnostic> &&StandaloneDiags) const {
+ for (auto &StandaloneDiag : StandaloneDiags)
+ Report(std::move(StandaloneDiag));
+ }
+
+private:
+ DiagnosticsEngine &Diags;
+ IntrusiveRefCntPtr<FileManager> OwnedFileMgr;
+ IntrusiveRefCntPtr<SourceManager> OwnedSrcMgr;
+
+ FileManager &getFileManager() const {
+ if (OwnedFileMgr)
+ return *OwnedFileMgr;
+ return Diags.getSourceManager().getFileManager();
+ }
+
+ SourceManager &getSourceManager() const {
+ if (OwnedSrcMgr)
+ return *OwnedSrcMgr;
+ return Diags.getSourceManager();
+ }
+};
+
+/// Collects diagnostics in a form that can be retained until after their
+/// associated SourceManager is destroyed.
+class StandaloneDiagCollector : public DiagnosticConsumer {
+public:
+ void BeginSourceFile(const LangOptions &LangOpts,
+ const Preprocessor *PP = nullptr) override {}
+
+ void HandleDiagnostic(DiagnosticsEngine::Level Level,
+ const Diagnostic &Info) override {
+ StoredDiagnostic StoredDiag(Level, Info);
+ StandaloneDiags.emplace_back(StoredDiag);
+ DiagnosticConsumer::HandleDiagnostic(Level, Info);
+ }
+
+ void EndSourceFile() override {}
+
+ StandaloneDiagList takeDiagnostics() { return std::move(StandaloneDiags); }
+
+private:
+ StandaloneDiagList StandaloneDiags;
+};
+} // anonymous namespace
+
+namespace {
+/// The full dependencies for a single compilation input.
+struct InputDependencies {
+ /// The identifier of the C++20 module this translation unit exports.
+ ///
+ /// If the translation unit is not a module then \c ID.ModuleName is empty.
+ ModuleID ID;
+
+ /// Whether this is a "system" module.
+ bool IsSystem;
+
+ /// A collection of absolute paths to files that this translation unit
+ /// directly depends on, not including transitive dependencies.
+ std::vector<std::string> FileDeps;
+
+ /// A list of modules this translation unit directly depends on, not including
+ /// transitive dependencies.
+ ///
+ /// This may include modules with a different context hash when it can be
+ /// determined that the differences are benign for this compilation.
+ std::vector<ModuleID> ClangModuleDeps;
+
+ /// A list of the C++20 named modules this translation unit depends on.
+ std::vector<std::string> NamedModuleDeps;
+
+ /// The compiler invocation with modifications to properly import all Clang
+ /// module dependencies. Does not include argv[0].
+ std::vector<std::string> BuildArgs;
+};
+
+/// The full dependencies for each compilation input and for all discovered
+/// Clang modules.
+struct DependencyScanResult {
+ /// The full dependencies for each compilation input, in the same order as the
+ /// inputs.
+ ///
+ /// System modules inputs that are not imported are represented as
+ /// std::nullopt.
+ llvm::SmallVector<std::optional<InputDependencies>> InputDeps;
+
+ /// The full Clang module dependenies for this compilation.
+ SmallVector<std::unique_ptr<ModuleDeps>> ClangModuleDeps;
+};
+
+/// Merges and deterministically orders scan results from multiple threads
+/// into a single DependencyScanResult.
+class ScanResultCollector {
+public:
+ explicit ScanResultCollector(size_t NumInputs) : InputDeps(NumInputs) {}
+
+ /// Adds the dependency scan result for the input at \c InputIndex.
+ ///
+ /// Thread safe, given that each index is written to exactly once.
+ void handleTUResult(TranslationUnitDeps &&TUDeps, bool IsSystem,
+ size_t InputIndex);
+
+ /// Finalizes and takes the aggregated results.
+ ///
+ /// Not thread-safe.
+ DependencyScanResult takeScanResults();
+
+private:
+ /// Merges and deterministically orders Clang module dependencies.
+ class ClangModuleDepsCollector {
+ public:
+ void mergeGraph(ModuleDepsGraph &&Graph, size_t InputIndex);
+
+ SmallVector<std::unique_ptr<ModuleDeps>> takeOrderedModuleDeps();
+
+ private:
+ /// We need the output of dependency scan to be deterministic. However,
+ /// the dependency graph may contain two modules with the same name. How
----------------
Bigcheese wrote:
Since this is a merged graph from multiple inputs, multiple modules with the same name is not an error. When doing multi-arch builds, or offloading you need different PCMs for each target arch.
https://github.com/llvm/llvm-project/pull/152770
More information about the cfe-commits
mailing list