[clang] [DependencyScanning] Add ability to scan TU with a buffer input (PR #125111)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 30 11:52:31 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Steven Wu (cachemeifyoucan)

<details>
<summary>Changes</summary>

Update Dependency scanner so it can scan the dependency of a TU with
a provided buffer rather than relying on the on disk file system to
provide the input file.


---

Patch is 26.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125111.diff


7 Files Affected:

- (modified) clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h (+8-5) 
- (modified) clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h (+42-5) 
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp (+5-3) 
- (modified) clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp (+111-49) 
- (added) clang/test/ClangScanDeps/tu-with-modules.c (+111) 
- (modified) clang/tools/clang-scan-deps/ClangScanDeps.cpp (+26-10) 
- (modified) clang/tools/clang-scan-deps/Opts.td (+2) 


``````````diff
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index ddb078dc16e3cd..bcc9ea17e2588f 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -128,14 +128,17 @@ class DependencyScanningTool {
   /// \param LookupModuleOutput This function is called to fill in
   ///                           "-fmodule-file=", "-o" and other output
   ///                           arguments for dependencies.
+  /// \param TUBuffer Optional memory buffer for translation unit input. If
+  ///                 TUBuffer is nullopt, the input should be included in the
+  ///                 Commandline already.
   ///
   /// \returns a \c StringError with the diagnostic output if clang errors
   /// occurred, \c TranslationUnitDeps otherwise.
-  llvm::Expected<TranslationUnitDeps>
-  getTranslationUnitDependencies(const std::vector<std::string> &CommandLine,
-                                 StringRef CWD,
-                                 const llvm::DenseSet<ModuleID> &AlreadySeen,
-                                 LookupModuleOutputCallback LookupModuleOutput);
+  llvm::Expected<TranslationUnitDeps> getTranslationUnitDependencies(
+      const std::vector<std::string> &CommandLine, StringRef CWD,
+      const llvm::DenseSet<ModuleID> &AlreadySeen,
+      LookupModuleOutputCallback LookupModuleOutput,
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
 
   /// Given a compilation context specified via the Clang driver command-line,
   /// gather modular dependencies of module with the given name, and return the
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index da6e0401411a34..ee7582b8510208 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -17,6 +17,7 @@
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBufferRef.h"
 #include <optional>
 #include <string>
 
@@ -83,9 +84,21 @@ class DependencyScanningWorker {
                            llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
 
   /// Run the dependency scanning tool for a given clang driver command-line,
-  /// and report the discovered dependencies to the provided consumer. If \p
-  /// ModuleName isn't empty, this function reports the dependencies of module
-  /// \p ModuleName.
+  /// and report the discovered dependencies to the provided consumer. If
+  /// TUBuffer is not nullopt, it is used as TU input for the dependency
+  /// scanning. Otherwise, the input should be included as part of the
+  /// command-line.
+  ///
+  /// \returns false if clang errors occurred (with diagnostics reported to
+  /// \c DiagConsumer), true otherwise.
+  bool computeDependencies(
+      StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+      DependencyConsumer &DepConsumer, DependencyActionController &Controller,
+      DiagnosticConsumer &DiagConsumer,
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific module.
   ///
   /// \returns false if clang errors occurred (with diagnostics reported to
   /// \c DiagConsumer), true otherwise.
@@ -94,13 +107,28 @@ class DependencyScanningWorker {
                            DependencyConsumer &DepConsumer,
                            DependencyActionController &Controller,
                            DiagnosticConsumer &DiagConsumer,
-                           std::optional<StringRef> ModuleName = std::nullopt);
+                           StringRef ModuleName);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific translation unit via file system or memory buffer.
+  ///
   /// \returns A \c StringError with the diagnostic output if clang errors
   /// occurred, success otherwise.
   llvm::Error computeDependencies(
       StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
       DependencyConsumer &Consumer, DependencyActionController &Controller,
-      std::optional<StringRef> ModuleName = std::nullopt);
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific module.
+  ///
+  /// \returns A \c StringError with the diagnostic output if clang errors
+  /// occurred, success otherwise.
+  llvm::Error computeDependencies(StringRef WorkingDirectory,
+                                  const std::vector<std::string> &CommandLine,
+                                  DependencyConsumer &Consumer,
+                                  DependencyActionController &Controller,
+                                  StringRef ModuleName);
 
   bool shouldEagerLoadModules() const { return EagerLoadModules; }
 
@@ -121,6 +149,15 @@ class DependencyScanningWorker {
   ScanningOptimizations OptimizeArgs;
   /// Whether to set up command-lines to load PCM files eagerly.
   bool EagerLoadModules;
+
+  /// Private helper functions.
+  bool scanDependencies(StringRef WorkingDirectory,
+                        const std::vector<std::string> &CommandLine,
+                        DependencyConsumer &Consumer,
+                        DependencyActionController &Controller,
+                        DiagnosticConsumer &DC,
+                        llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+                        std::optional<StringRef> ModuleName);
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
index 4219f671658613..2b4c2bb76434ac 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
@@ -142,11 +142,13 @@ llvm::Expected<TranslationUnitDeps>
 DependencyScanningTool::getTranslationUnitDependencies(
     const std::vector<std::string> &CommandLine, StringRef CWD,
     const llvm::DenseSet<ModuleID> &AlreadySeen,
-    LookupModuleOutputCallback LookupModuleOutput) {
+    LookupModuleOutputCallback LookupModuleOutput,
+    std::optional<llvm::MemoryBufferRef> TUBuffer) {
   FullDependencyConsumer Consumer(AlreadySeen);
   CallbackActionController Controller(LookupModuleOutput);
-  llvm::Error Result =
-      Worker.computeDependencies(CWD, CommandLine, Consumer, Controller);
+  llvm::Error Result = Worker.computeDependencies(CWD, CommandLine, Consumer,
+                                                  Controller, TUBuffer);
+
   if (Result)
     return std::move(Result);
   return Consumer.takeTranslationUnitDeps();
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 5a648df05e4fd3..d15b74a28ab241 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -24,9 +24,11 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/TargetParser/Host.h"
 #include <optional>
 
@@ -521,20 +523,43 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
-llvm::Error DependencyScanningWorker::computeDependencies(
-    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
-    DependencyConsumer &Consumer, DependencyActionController &Controller,
-    std::optional<StringRef> ModuleName) {
+static std::unique_ptr<DiagnosticOptions>
+createDiagOptions(const std::vector<std::string> &CommandLine) {
   std::vector<const char *> CLI;
   for (const std::string &Arg : CommandLine)
     CLI.push_back(Arg.c_str());
   auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
   sanitizeDiagOpts(*DiagOpts);
+  return DiagOpts;
+}
+
+llvm::Error DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    std::optional<llvm::MemoryBufferRef> TUBuffer) {
+  // Capture the emitted diagnostics and report them to the client
+  // in the case of a failure.
+  std::string DiagnosticOutput;
+  llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
+  auto DiagOpts = createDiagOptions(CommandLine);
+  TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
 
+  if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
+                          DiagPrinter, TUBuffer))
+    return llvm::Error::success();
+  return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
+                                             llvm::inconvertibleErrorCode());
+}
+
+llvm::Error DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    StringRef ModuleName) {
   // Capture the emitted diagnostics and report them to the client
   // in the case of a failure.
   std::string DiagnosticOutput;
   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
+  auto DiagOpts = createDiagOptions(CommandLine);
   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
 
   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
@@ -604,54 +629,22 @@ static bool createAndRunToolInvocation(
   return true;
 }
 
-bool DependencyScanningWorker::computeDependencies(
+bool DependencyScanningWorker::scanDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
-    DiagnosticConsumer &DC, std::optional<StringRef> ModuleName) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  std::optional<std::vector<std::string>> ModifiedCommandLine;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
-
-  // If we're scanning based on a module name alone, we don't expect the client
-  // to provide us with an input file. However, the driver really wants to have
-  // one. Let's just make it up to make the driver happy.
-  if (ModuleName) {
-    auto OverlayFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-    auto InMemoryFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-    InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-    OverlayFS->pushOverlay(InMemoryFS);
-    ModifiedFS = OverlayFS;
-
-    SmallString<128> FakeInputPath;
-    // TODO: We should retry the creation if the path already exists.
-    llvm::sys::fs::createUniquePath(*ModuleName + "-%%%%%%%%.input",
-                                    FakeInputPath,
-                                    /*MakeAbsolute=*/false);
-    InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
-
-    ModifiedCommandLine = CommandLine;
-    ModifiedCommandLine->emplace_back(FakeInputPath);
-  }
-
-  const std::vector<std::string> &FinalCommandLine =
-      ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
-  auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
-
+    DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+    std::optional<StringRef> ModuleName) {
   auto FileMgr =
-      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FinalFS);
+      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FS);
 
-  std::vector<const char *> FinalCCommandLine(FinalCommandLine.size(), nullptr);
-  llvm::transform(FinalCommandLine, FinalCCommandLine.begin(),
+  std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
+  llvm::transform(CommandLine, CCommandLine.begin(),
                   [](const std::string &Str) { return Str.c_str(); });
-
-  auto DiagOpts = CreateAndPopulateDiagOpts(FinalCCommandLine);
+  auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
   sanitizeDiagOpts(*DiagOpts);
   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
-      CompilerInstance::createDiagnostics(*FinalFS, DiagOpts.release(), &DC,
+      CompilerInstance::createDiagnostics(FileMgr->getVirtualFileSystem(),
+                                          DiagOpts.release(), &DC,
                                           /*ShouldOwnClient=*/false);
 
   // Although `Diagnostics` are used only for command-line parsing, the
@@ -667,12 +660,12 @@ bool DependencyScanningWorker::computeDependencies(
                                   DisableFree, ModuleName);
 
   bool Success = false;
-  if (FinalCommandLine[1] == "-cc1") {
-    Success = createAndRunToolInvocation(FinalCommandLine, Action, *FileMgr,
+  if (CommandLine[1] == "-cc1") {
+    Success = createAndRunToolInvocation(CommandLine, Action, *FileMgr,
                                          PCHContainerOps, *Diags, Consumer);
   } else {
     Success = forEachDriverJob(
-        FinalCommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
+        CommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
           if (StringRef(Cmd.getCreator().getName()) != "clang") {
             // Non-clang command. Just pass through to the dependency
             // consumer.
@@ -699,8 +692,77 @@ bool DependencyScanningWorker::computeDependencies(
 
   if (Success && !Action.hasScanned())
     Diags->Report(diag::err_fe_expected_compiler_job)
-        << llvm::join(FinalCommandLine, " ");
+        << llvm::join(CommandLine, " ");
   return Success && Action.hasScanned();
 }
 
+bool DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  std::optional<std::vector<std::string>> ModifiedCommandLine;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  if (TUBuffer) {
+    auto OverlayFS =
+        llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+    auto InMemoryFS =
+        llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+    InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+    auto InputPath = TUBuffer->getBufferIdentifier();
+    InMemoryFS->addFile(
+        InputPath, 0,
+        llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer()));
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay =
+        InMemoryFS;
+
+    OverlayFS->pushOverlay(InMemoryOverlay);
+    ModifiedFS = OverlayFS;
+    ModifiedCommandLine = CommandLine;
+    ModifiedCommandLine->emplace_back(InputPath);
+  }
+
+  const std::vector<std::string> &FinalCommandLine =
+      ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
+  auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
+
+  return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
+                          Controller, DC, FinalFS, /*ModuleName=*/std::nullopt);
+}
+
+bool DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    DiagnosticConsumer &DC, StringRef ModuleName) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  SmallString<128> FakeInputPath;
+  // TODO: We should retry the creation if the path already exists.
+  llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
+                                  /*MakeAbsolute=*/false);
+  InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+
+  OverlayFS->pushOverlay(InMemoryOverlay);
+  auto ModifiedCommandLine = CommandLine;
+  ModifiedCommandLine.emplace_back(FakeInputPath);
+
+  return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer,
+                          Controller, DC, OverlayFS, ModuleName);
+}
+
 DependencyActionController::~DependencyActionController() {}
diff --git a/clang/test/ClangScanDeps/tu-with-modules.c b/clang/test/ClangScanDeps/tu-with-modules.c
new file mode 100644
index 00000000000000..386c79ed6b4f68
--- /dev/null
+++ b/clang/test/ClangScanDeps/tu-with-modules.c
@@ -0,0 +1,111 @@
+// UNSUPPORTED: target=powerpc64-ibm-aix{{.*}}
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- module.modulemap
+module root { header "root.h" }
+module direct { header "direct.h" }
+module transitive { header "transitive.h" }
+module addition { header "addition.h" }
+//--- root.h
+#include "direct.h"
+#include "root/textual.h"
+//--- direct.h
+#include "transitive.h"
+//--- transitive.h
+// empty
+
+//--- addition.h
+// empty
+
+//--- tu.c
+#include "root.h"
+
+//--- root/textual.h
+// This is here to verify that the "root" directory doesn't clash with name of
+// the "root" module.
+
+//--- cdb.json.template
+[{
+  "file": "",
+  "directory": "DIR",
+  "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR -x c -c"
+}]
+
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full -tu-buffer-path %t/tu.c > %t/result.json
+// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s --check-prefix=CHECK
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "{{.*}}",
+// CHECK-NEXT:           "module-name": "transitive"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ],
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK-NEXT:         "[[PREFIX]]/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/direct.h"
+// CHECK-NEXT:       ],
+// CHECK:            "name": "direct"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "{{.*}}",
+// CHECK-NEXT:           "module-name": "direct"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ],
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK-NEXT:         "[[PREFIX]]/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/root.h"
+// CHECK-NEXT:         "[[PREFIX]]/root/textual.h"
+// CHECK-NEXT:       ],
+// CHECK:            "name": "root"
+// CHECK-NE...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/125111


More information about the cfe-commits mailing list