[clang] 6626f6f - [clang][deps] Override dependency and serialized diag files for modules

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 12 08:20:27 PDT 2022


Author: Ben Langmuir
Date: 2022-07-12T08:19:52-07:00
New Revision: 6626f6fec3d37b78b628b858bdadbbb8301e1a2f

URL: https://github.com/llvm/llvm-project/commit/6626f6fec3d37b78b628b858bdadbbb8301e1a2f
DIFF: https://github.com/llvm/llvm-project/commit/6626f6fec3d37b78b628b858bdadbbb8301e1a2f.diff

LOG: [clang][deps] Override dependency and serialized diag files for modules

When building modules, override secondary outputs (dependency file,
dependency targets, serialized diagnostic file) in addition to the pcm
file path. This avoids inheriting per-TU command-line options that
cause non-determinism in the results (non-deterministic command-line for
the module build, non-determinism in which TU's .diag and .d files will
contain the module outputs). In clang-scan-deps we infer whether to
generate dependency or serialized diagnostic files based on an original
command-line. In a real build system this should be modeled explicitly.

Differential Revision: https://reviews.llvm.org/D129389

Added: 
    clang/test/ClangScanDeps/generate-modules-path-args.c

Modified: 
    clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
    clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
    clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
    clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
    clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template
    clang/test/ClangScanDeps/removed-args.c
    clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 
    clang/test/ClangScanDeps/Inputs/preserved-args/cdb.json.template
    clang/test/ClangScanDeps/Inputs/preserved-args/mod.h
    clang/test/ClangScanDeps/Inputs/preserved-args/module.modulemap
    clang/test/ClangScanDeps/Inputs/preserved-args/tu.c
    clang/test/ClangScanDeps/preserved-args.c


################################################################################
diff  --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index 3bb44e44187ba..a85d333ba6b18 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -47,12 +47,12 @@ struct FullDependencies {
 
   /// Get the full command line.
   ///
-  /// \param LookupPCMPath This function is called to fill in "-fmodule-file="
-  ///                      arguments and the "-o" argument. It needs to return
-  ///                      a path for where the PCM for the given module is to
-  ///                      be located.
-  std::vector<std::string>
-  getCommandLine(std::function<StringRef(ModuleID)> LookupPCMPath) const;
+  /// \param LookupModuleOutput This function is called to fill in
+  ///                           "-fmodule-file=", "-o" and other output
+  ///                           arguments for dependencies.
+  std::vector<std::string> getCommandLine(
+      llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
+          LookupOutput) const;
 
   /// Get the full command line, excluding -fmodule-file=" arguments.
   std::vector<std::string> getCommandLineWithoutModulePaths() const;

diff  --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index e0a4d6a554eb2..cb68df8314da1 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -65,6 +65,19 @@ struct ModuleIDHasher {
   }
 };
 
+/// An output from a module compilation, such as the path of the module file.
+enum class ModuleOutputKind {
+  /// The module file (.pcm). Required.
+  ModuleFile,
+  /// The path of the dependency file (.d), if any.
+  DependencyFile,
+  /// The null-separated list of names to use as the targets in the dependency
+  /// file, if any.
+  DependencyTargets,
+  /// The path of the serialized diagnostic file (.dia), if any.
+  DiagnosticSerializationFile,
+};
+
 struct ModuleDeps {
   /// The identifier of the module.
   ModuleID ID;
@@ -104,17 +117,25 @@ struct ModuleDeps {
   // the primary TU.
   bool ImportedByMainFile = false;
 
+  /// Whether the TU had a dependency file. The path in \c BuildInvocation is
+  /// cleared to avoid leaking the specific path from the TU into the module.
+  bool HadDependencyFile = false;
+
+  /// Whether the TU had serialized diagnostics. The path in \c BuildInvocation
+  /// is cleared to avoid leaking the specific path from the TU into the module.
+  bool HadSerializedDiagnostics = false;
+
   /// Compiler invocation that can be used to build this module (without paths).
   CompilerInvocation BuildInvocation;
 
   /// Gets the canonical command line suitable for passing to clang.
   ///
-  /// \param LookupPCMPath This function is called to fill in "-fmodule-file="
-  ///                      arguments and the "-o" argument. It needs to return
-  ///                      a path for where the PCM for the given module is to
-  ///                      be located.
+  /// \param LookupModuleOutput This function is called to fill in
+  ///                           "-fmodule-file=", "-o" and other output
+  ///                           arguments.
   std::vector<std::string> getCanonicalCommandLine(
-      std::function<StringRef(ModuleID)> LookupPCMPath) const;
+      llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
+          LookupModuleOutput) const;
 
   /// Gets the canonical command line suitable for passing to clang, excluding
   /// "-fmodule-file=" and "-o" arguments.

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
index 575aa46bff1b6..43127ea2df983 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
@@ -14,11 +14,14 @@ using namespace tooling;
 using namespace dependencies;
 
 std::vector<std::string> FullDependencies::getCommandLine(
-    std::function<StringRef(ModuleID)> LookupPCMPath) const {
+    llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
+        LookupModuleOutput) const {
   std::vector<std::string> Ret = getCommandLineWithoutModulePaths();
 
-  for (ModuleID MID : ClangModuleDeps)
-    Ret.push_back(("-fmodule-file=" + LookupPCMPath(MID)).str());
+  for (ModuleID MID : ClangModuleDeps) {
+    auto PCM = LookupModuleOutput(MID, ModuleOutputKind::ModuleFile);
+    Ret.push_back("-fmodule-file=" + PCM);
+  }
 
   return Ret;
 }

diff  --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index f7d96130b9712..2d4af501343b2 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -56,6 +56,9 @@ CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
   CI.getFrontendOpts().OutputFile.clear();
   CI.getCodeGenOpts().MainFileName.clear();
   CI.getCodeGenOpts().DwarfDebugFlags.clear();
+  CI.getDiagnosticOpts().DiagnosticSerializationFile.clear();
+  CI.getDependencyOutputOpts().OutputFile.clear();
+  CI.getDependencyOutputOpts().Targets.clear();
 
   CI.getFrontendOpts().ProgramAction = frontend::GenerateModule;
   CI.getLangOpts()->ModuleName = Deps.ID.ModuleName;
@@ -107,18 +110,40 @@ serializeCompilerInvocation(const CompilerInvocation &CI) {
   return std::vector<std::string>{Args.begin(), Args.end()};
 }
 
+static std::vector<std::string> splitString(std::string S, char Separator) {
+  SmallVector<StringRef> Segments;
+  StringRef(S).split(Segments, Separator);
+  std::vector<std::string> Result;
+  Result.reserve(Segments.size());
+  for (StringRef Segment : Segments)
+    Result.push_back(Segment.str());
+  return Result;
+}
+
 std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
-    std::function<StringRef(ModuleID)> LookupPCMPath) const {
+    llvm::function_ref<std::string(const ModuleID &, ModuleOutputKind)>
+        LookupModuleOutput) const {
   CompilerInvocation CI(BuildInvocation);
   FrontendOptions &FrontendOpts = CI.getFrontendOpts();
 
   InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
                                InputKind::Format::ModuleMap);
   FrontendOpts.Inputs.emplace_back(ClangModuleMapFile, ModuleMapInputKind);
-  FrontendOpts.OutputFile = std::string(LookupPCMPath(ID));
+  FrontendOpts.OutputFile =
+      LookupModuleOutput(ID, ModuleOutputKind::ModuleFile);
+  if (HadSerializedDiagnostics)
+    CI.getDiagnosticOpts().DiagnosticSerializationFile =
+        LookupModuleOutput(ID, ModuleOutputKind::DiagnosticSerializationFile);
+  if (HadDependencyFile) {
+    CI.getDependencyOutputOpts().OutputFile =
+        LookupModuleOutput(ID, ModuleOutputKind::DependencyFile);
+    CI.getDependencyOutputOpts().Targets = splitString(
+        LookupModuleOutput(ID, ModuleOutputKind::DependencyTargets), '\0');
+  }
 
   for (ModuleID MID : ClangModuleDeps)
-    FrontendOpts.ModuleFiles.emplace_back(LookupPCMPath(MID));
+    FrontendOpts.ModuleFiles.push_back(
+        LookupModuleOutput(MID, ModuleOutputKind::ModuleFile));
 
   return serializeCompilerInvocation(CI);
 }
@@ -309,6 +334,12 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
           optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
                                    *MDC.ScanInstance.getASTReader(), *MF);
       });
+  MD.HadSerializedDiagnostics = !MDC.OriginalInvocation.getDiagnosticOpts()
+                                     .DiagnosticSerializationFile.empty();
+  MD.HadDependencyFile =
+      !MDC.OriginalInvocation.getDependencyOutputOpts().OutputFile.empty();
+  // FIXME: HadSerializedDiagnostics and HadDependencyFile should be included in
+  // the context hash since it can affect the command-line.
   MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
 
   llvm::DenseSet<const Module *> AddedModules;

diff  --git a/clang/test/ClangScanDeps/Inputs/preserved-args/cdb.json.template b/clang/test/ClangScanDeps/Inputs/preserved-args/cdb.json.template
deleted file mode 100644
index 331b0d5008b22..0000000000000
--- a/clang/test/ClangScanDeps/Inputs/preserved-args/cdb.json.template
+++ /dev/null
@@ -1,7 +0,0 @@
-[
-  {
-    "directory": "DIR",
-    "command": "clang -MD -MT my_target -serialize-diagnostics DIR/tu.dia -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -fmodule-file=Foo=DIR/foo.pcm -o DIR/tu.o",
-    "file": "DIR/tu.c"
-  }
-]

diff  --git a/clang/test/ClangScanDeps/Inputs/preserved-args/mod.h b/clang/test/ClangScanDeps/Inputs/preserved-args/mod.h
deleted file mode 100644
index 51e37a532a3a6..0000000000000
--- a/clang/test/ClangScanDeps/Inputs/preserved-args/mod.h
+++ /dev/null
@@ -1 +0,0 @@
-// mod.h

diff  --git a/clang/test/ClangScanDeps/Inputs/preserved-args/module.modulemap b/clang/test/ClangScanDeps/Inputs/preserved-args/module.modulemap
deleted file mode 100644
index 99e4ee8f6363c..0000000000000
--- a/clang/test/ClangScanDeps/Inputs/preserved-args/module.modulemap
+++ /dev/null
@@ -1,3 +0,0 @@
-module Mod {
-  header "mod.h"
-}

diff  --git a/clang/test/ClangScanDeps/Inputs/preserved-args/tu.c b/clang/test/ClangScanDeps/Inputs/preserved-args/tu.c
deleted file mode 100644
index 01f145835c765..0000000000000
--- a/clang/test/ClangScanDeps/Inputs/preserved-args/tu.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "mod.h"

diff  --git a/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template b/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template
index f92e7884e1786..95d1f99955bb0 100644
--- a/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template
+++ b/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template
@@ -1,7 +1,7 @@
 [
   {
     "directory": "DIR",
-    "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-validate-once-per-build-session -fbuild-session-file=DIR/build-session -fmodules-prune-interval=123 -fmodules-prune-after=123 -fmodules-cache-path=DIR/cache -include DIR/header.h -grecord-command-line -o DIR/tu.o",
+    "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-validate-once-per-build-session -fbuild-session-file=DIR/build-session -fmodules-prune-interval=123 -fmodules-prune-after=123 -fmodules-cache-path=DIR/cache -include DIR/header.h -grecord-command-line -o DIR/tu.o -serialize-diagnostics DIR/tu.diag -MT tu -MD -MF DIR/tu.d",
     "file": "DIR/tu.c"
   }
 ]

diff  --git a/clang/test/ClangScanDeps/generate-modules-path-args.c b/clang/test/ClangScanDeps/generate-modules-path-args.c
new file mode 100644
index 0000000000000..79f94ba82e189
--- /dev/null
+++ b/clang/test/ClangScanDeps/generate-modules-path-args.c
@@ -0,0 +1,52 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+// RUN: sed "s|DIR|%/t|g" %t/cdb_without.json.template > %t/cdb_without.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json \
+// RUN:   -format experimental-full -generate-modules-path-args > %t/deps.json
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+// RUN: clang-scan-deps -compilation-database %t/cdb_without.json \
+// RUN:   -format experimental-full -generate-modules-path-args > %t/deps_without.json
+// RUN: cat %t/deps_without.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t -check-prefix=WITHOUT %s
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:     {
+// CHECK:            "command-line": [
+// CHECK-NEXT:         "-cc1"
+// CHECK:              "-serialize-diagnostic-file"
+// CHECK-NEXT:         "[[PREFIX]]{{.*}}Mod{{.*}}.diag"
+// CHECK:              "-dependency-file"
+// CHECK-NEXT:         "[[PREFIX]]{{.*}}Mod{{.*}}.d"
+// CHECK:            ],
+
+// WITHOUT:      {
+// WITHOUT-NEXT:   "modules": [
+// WITHOUT-NEXT:     {
+// WITHOUT:            "command-line": [
+// WITHOUT-NEXT:         "-cc1"
+// WITHOUT-NOT:          "-serialize-diagnostic-file"
+// WITHOUT-NOT:          "-dependency-file"
+// WITHOUT:            ],
+
+//--- cdb.json.template
+[{
+  "directory": "DIR",
+  "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -serialize-diagnostics DIR/tu.diag -MD -MT tu -MF DIR/tu.d",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb_without.json.template
+[{
+  "directory": "DIR",
+  "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+  "file": "DIR/tu.c"
+}]
+
+//--- module.modulemap
+module Mod { header "Mod.h" }
+
+//--- Mod.h
+
+//--- tu.c
+#include "Mod.h"

diff  --git a/clang/test/ClangScanDeps/preserved-args.c b/clang/test/ClangScanDeps/preserved-args.c
deleted file mode 100644
index a1f0a5c40ec32..0000000000000
--- a/clang/test/ClangScanDeps/preserved-args.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// RUN: rm -rf %t && mkdir %t
-// RUN: cp -r %S/Inputs/preserved-args/* %t
-// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
-
-// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json
-// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
-
-// CHECK:      {
-// CHECK-NEXT:   "modules": [
-// CHECK-NEXT:     {
-// CHECK:            "command-line": [
-// CHECK-NEXT:         "-cc1"
-// CHECK:              "-serialize-diagnostic-file"
-// CHECK-NEXT:         "[[PREFIX]]/tu.dia"
-// CHECK:              "-fmodule-file=Foo=[[PREFIX]]/foo.pcm"
-// CHECK:              "-MT"
-// CHECK-NEXT:         "my_target"
-// CHECK:              "-dependency-file"
-// CHECK-NEXT:         "[[PREFIX]]/tu.d"
-// CHECK:            ],
-// CHECK:            "name": "Mod"
-// CHECK-NEXT:     }
-// CHECK-NEXT:   ]
-// CHECK:      }

diff  --git a/clang/test/ClangScanDeps/removed-args.c b/clang/test/ClangScanDeps/removed-args.c
index 8c07ca8d1f7e9..8be7c31578b98 100644
--- a/clang/test/ClangScanDeps/removed-args.c
+++ b/clang/test/ClangScanDeps/removed-args.c
@@ -29,6 +29,9 @@
 // CHECK-NOT:          "-fbuild-session-timestamp=
 // CHECK-NOT:          "-fmodules-prune-interval=
 // CHECK-NOT:          "-fmodules-prune-after=
+// CHECK-NOT:          "-dependency-file"
+// CHECK-NOT:          "-MT"
+// CHECK-NOT:          "-serialize-diagnostic-file"
 // CHECK:            ],
 // CHECK-NEXT:       "context-hash": "[[HASH_MOD_HEADER:.*]]",
 // CHECK-NEXT:       "file-deps": [
@@ -50,6 +53,9 @@
 // CHECK-NOT:          "-fbuild-session-timestamp=
 // CHECK-NOT:          "-fmodules-prune-interval=
 // CHECK-NOT:          "-fmodules-prune-after=
+// CHECK-NOT:          "-dependency-file"
+// CHECK-NOT:          "-MT"
+// CHECK-NOT:          "-serialize-diagnostic-file"
 // CHECK:            ],
 // CHECK-NEXT:       "context-hash": "[[HASH_MOD_TU:.*]]",
 // CHECK-NEXT:       "file-deps": [

diff  --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index c37d098a70672..467d7d5c3644b 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -288,11 +288,12 @@ class FullDeps {
       Modules.insert(I, {{MD.ID, InputIndex}, std::move(MD)});
     }
 
-    ID.CommandLine = GenerateModulesPathArgs
-                         ? FD.getCommandLine(
-                               [&](ModuleID MID) { return lookupPCMPath(MID); })
-                         : FD.getCommandLineWithoutModulePaths();
-
+    ID.CommandLine =
+        GenerateModulesPathArgs
+            ? FD.getCommandLine([&](const ModuleID &MID, ModuleOutputKind MOK) {
+                return lookupModuleOutput(MID, MOK);
+              })
+            : FD.getCommandLineWithoutModulePaths();
     Inputs.push_back(std::move(ID));
   }
 
@@ -325,7 +326,9 @@ class FullDeps {
           {"command-line",
            GenerateModulesPathArgs
                ? MD.getCanonicalCommandLine(
-                     [&](ModuleID MID) { return lookupPCMPath(MID); })
+                     [&](const ModuleID &MID, ModuleOutputKind MOK) {
+                       return lookupModuleOutput(MID, MOK);
+                     })
                : MD.getCanonicalCommandLineWithoutModulePaths()},
       };
       OutModules.push_back(std::move(O));
@@ -352,11 +355,22 @@ class FullDeps {
   }
 
 private:
-  StringRef lookupPCMPath(ModuleID MID) {
+  std::string lookupModuleOutput(const ModuleID &MID, ModuleOutputKind MOK) {
+    // Cache the PCM path, since it will be queried repeatedly for each module.
+    // The other outputs are only queried once during getCanonicalCommandLine.
     auto PCMPath = PCMPaths.insert({MID, ""});
     if (PCMPath.second)
       PCMPath.first->second = constructPCMPath(MID);
-    return PCMPath.first->second;
+    switch (MOK) {
+    case ModuleOutputKind::ModuleFile:
+      return PCMPath.first->second;
+    case ModuleOutputKind::DependencyFile:
+      return PCMPath.first->second + ".d";
+    case ModuleOutputKind::DependencyTargets:
+      return ""; // Will get the default target name.
+    case ModuleOutputKind::DiagnosticSerializationFile:
+      return PCMPath.first->second + ".diag";
+    }
   }
 
   /// Construct a path for the explicitly built PCM.


        


More information about the cfe-commits mailing list