[clang] cf73d3f - [clang][deps] Ensure module invocation can be serialized

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 8 14:23:53 PST 2023


Author: Ben Langmuir
Date: 2023-02-08T14:23:39-08:00
New Revision: cf73d3f07b5b0ff83a852dfdf8857500e86f9952

URL: https://github.com/llvm/llvm-project/commit/cf73d3f07b5b0ff83a852dfdf8857500e86f9952
DIFF: https://github.com/llvm/llvm-project/commit/cf73d3f07b5b0ff83a852dfdf8857500e86f9952.diff

LOG: [clang][deps] Ensure module invocation can be serialized

When reseting modular options, propagate the values from certain options
that have ImpliedBy relations instead of setting to the default. Also,
verify in clang-scan-deps that the command line produced round trips
exactly.

Ideally we would automatically derive the set of options that need this
kind of propagation, but for now there aren't very many impacted.

rdar://105148590

Differential Revision: https://reviews.llvm.org/D143446

Added: 
    clang/test/ClangScanDeps/modules-implied-args.c

Modified: 
    clang/include/clang/Frontend/CompilerInvocation.h
    clang/lib/Basic/LangOptions.cpp
    clang/lib/Frontend/CompilerInvocation.cpp
    clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h
index 254f048ed3c7e..1dbd1eda62b3f 100644
--- a/clang/include/clang/Frontend/CompilerInvocation.h
+++ b/clang/include/clang/Frontend/CompilerInvocation.h
@@ -241,6 +241,17 @@ class CompilerInvocation : public CompilerInvocationRefBase,
   /// This is a (less-efficient) wrapper over generateCC1CommandLine().
   std::vector<std::string> getCC1CommandLine() const;
 
+  /// Check that \p Args can be parsed and re-serialized without change,
+  /// emiting diagnostics for any 
diff erences.
+  ///
+  /// This check is only suitable for command-lines that are expected to already
+  /// be canonical.
+  ///
+  /// \return false if there are any errors.
+  static bool checkCC1RoundTrip(ArrayRef<const char *> Args,
+                                DiagnosticsEngine &Diags,
+                                const char *Argv0 = nullptr);
+
   /// Reset all of the options that are not considered when building a
   /// module.
   void resetNonModularOptions();

diff  --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 753b6bfe18a37..f22fe9a82bda4 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -29,6 +29,14 @@ void LangOptions::resetNonModularOptions() {
   Name = static_cast<unsigned>(Default);
 #include "clang/Basic/LangOptions.def"
 
+  // Reset "benign" options with implied values (Options.td ImpliedBy relations)
+  // rather than their defaults. This avoids unexpected combinations and
+  // invocations that cannot be round-tripped to arguments.
+  // FIXME: we should derive this automatically from ImpliedBy in tablegen.
+  AllowFPReassoc = UnsafeFPMath;
+  NoHonorNaNs = FiniteMathOnly;
+  NoHonorInfs = FiniteMathOnly;
+
   // These options do not affect AST generation.
   NoSanitizeFiles.clear();
   XRayAlwaysInstrumentFiles.clear();

diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index f456d47ed1009..4bf77ec4d0782 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -641,18 +641,31 @@ using GenerateFn = llvm::function_ref<void(
     CompilerInvocation &, SmallVectorImpl<const char *> &,
     CompilerInvocation::StringAllocator)>;
 
-// May perform round-trip of command line arguments. By default, the round-trip
-// is enabled in assert builds. This can be overwritten at run-time via the
-// "-round-trip-args" and "-no-round-trip-args" command line flags.
-// During round-trip, the command line arguments are parsed into a dummy
-// instance of CompilerInvocation which is used to generate the command line
-// arguments again. The real CompilerInvocation instance is then created by
-// parsing the generated arguments, not the original ones.
+/// May perform round-trip of command line arguments. By default, the round-trip
+/// is enabled in assert builds. This can be overwritten at run-time via the
+/// "-round-trip-args" and "-no-round-trip-args" command line flags, or via the
+/// ForceRoundTrip parameter.
+///
+/// During round-trip, the command line arguments are parsed into a dummy
+/// CompilerInvocation, which is used to generate the command line arguments
+/// again. The real CompilerInvocation is then created by parsing the generated
+/// arguments, not the original ones. This (in combination with tests covering
+/// argument behavior) ensures the generated command line is complete (doesn't
+/// drop/mangle any arguments).
+///
+/// Finally, we check the command line that was used to create the real
+/// CompilerInvocation instance. By default, we compare it to the command line
+/// the real CompilerInvocation generates. This checks whether the generator is
+/// deterministic. If \p CheckAgainstOriginalInvocation is enabled, we instead
+/// compare it to the original command line to verify the original command-line
+/// was canonical and can round-trip exactly.
 static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
                       CompilerInvocation &RealInvocation,
                       CompilerInvocation &DummyInvocation,
                       ArrayRef<const char *> CommandLineArgs,
-                      DiagnosticsEngine &Diags, const char *Argv0) {
+                      DiagnosticsEngine &Diags, const char *Argv0,
+                      bool CheckAgainstOriginalInvocation = false,
+                      bool ForceRoundTrip = false) {
 #ifndef NDEBUG
   bool DoRoundTripDefault = true;
 #else
@@ -660,11 +673,15 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
 #endif
 
   bool DoRoundTrip = DoRoundTripDefault;
-  for (const auto *Arg : CommandLineArgs) {
-    if (Arg == StringRef("-round-trip-args"))
-      DoRoundTrip = true;
-    if (Arg == StringRef("-no-round-trip-args"))
-      DoRoundTrip = false;
+  if (ForceRoundTrip) {
+    DoRoundTrip = true;
+  } else {
+    for (const auto *Arg : CommandLineArgs) {
+      if (Arg == StringRef("-round-trip-args"))
+        DoRoundTrip = true;
+      if (Arg == StringRef("-no-round-trip-args"))
+        DoRoundTrip = false;
+    }
   }
 
   // If round-trip was not requested, simply run the parser with the real
@@ -719,30 +736,34 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
   // Generate arguments from the dummy invocation. If Generate is the
   // inverse of Parse, the newly generated arguments must have the same
   // semantics as the original.
-  SmallVector<const char *> GeneratedArgs1;
-  Generate(DummyInvocation, GeneratedArgs1, SA);
+  SmallVector<const char *> GeneratedArgs;
+  Generate(DummyInvocation, GeneratedArgs, SA);
 
   // Run the second parse, now on the generated arguments, and with the real
   // invocation and diagnostics. The result is what we will end up using for the
   // rest of compilation, so if Generate is not inverse of Parse, something down
   // the line will break.
-  bool Success2 = Parse(RealInvocation, GeneratedArgs1, Diags, Argv0);
+  bool Success2 = Parse(RealInvocation, GeneratedArgs, Diags, Argv0);
 
   // The first parse on original arguments succeeded, but second parse of
   // generated arguments failed. Something must be wrong with the generator.
   if (!Success2) {
     Diags.Report(diag::err_cc1_round_trip_ok_then_fail);
     Diags.Report(diag::note_cc1_round_trip_generated)
-        << 1 << SerializeArgs(GeneratedArgs1);
+        << 1 << SerializeArgs(GeneratedArgs);
     return false;
   }
 
-  // Generate arguments again, this time from the options we will end up using
-  // for the rest of the compilation.
-  SmallVector<const char *> GeneratedArgs2;
-  Generate(RealInvocation, GeneratedArgs2, SA);
+  SmallVector<const char *> ComparisonArgs;
+  if (CheckAgainstOriginalInvocation)
+    // Compare against original arguments.
+    ComparisonArgs.assign(CommandLineArgs.begin(), CommandLineArgs.end());
+  else
+    // Generate arguments again, this time from the options we will end up using
+    // for the rest of the compilation.
+    Generate(RealInvocation, ComparisonArgs, SA);
 
-  // Compares two lists of generated arguments.
+  // Compares two lists of arguments.
   auto Equal = [](const ArrayRef<const char *> A,
                   const ArrayRef<const char *> B) {
     return std::equal(A.begin(), A.end(), B.begin(), B.end(),
@@ -754,23 +775,41 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
   // If we generated 
diff erent arguments from what we assume are two
   // semantically equivalent CompilerInvocations, the Generate function may
   // be non-deterministic.
-  if (!Equal(GeneratedArgs1, GeneratedArgs2)) {
+  if (!Equal(GeneratedArgs, ComparisonArgs)) {
     Diags.Report(diag::err_cc1_round_trip_mismatch);
     Diags.Report(diag::note_cc1_round_trip_generated)
-        << 1 << SerializeArgs(GeneratedArgs1);
+        << 1 << SerializeArgs(GeneratedArgs);
     Diags.Report(diag::note_cc1_round_trip_generated)
-        << 2 << SerializeArgs(GeneratedArgs2);
+        << 2 << SerializeArgs(ComparisonArgs);
     return false;
   }
 
   Diags.Report(diag::remark_cc1_round_trip_generated)
-      << 1 << SerializeArgs(GeneratedArgs1);
+      << 1 << SerializeArgs(GeneratedArgs);
   Diags.Report(diag::remark_cc1_round_trip_generated)
-      << 2 << SerializeArgs(GeneratedArgs2);
+      << 2 << SerializeArgs(ComparisonArgs);
 
   return Success2;
 }
 
+bool CompilerInvocation::checkCC1RoundTrip(ArrayRef<const char *> Args,
+                                           DiagnosticsEngine &Diags,
+                                           const char *Argv0) {
+  CompilerInvocation DummyInvocation1, DummyInvocation2;
+  return RoundTrip(
+      [](CompilerInvocation &Invocation, ArrayRef<const char *> CommandLineArgs,
+         DiagnosticsEngine &Diags, const char *Argv0) {
+        return CreateFromArgsImpl(Invocation, CommandLineArgs, Diags, Argv0);
+      },
+      [](CompilerInvocation &Invocation, SmallVectorImpl<const char *> &Args,
+         StringAllocator SA) {
+        Args.push_back("-cc1");
+        Invocation.generateCC1CommandLine(Args, SA);
+      },
+      DummyInvocation1, DummyInvocation2, Args, Diags, Argv0,
+      /*CheckAgainstOriginalInvocation=*/true, /*ForceRoundTrip=*/true);
+}
+
 static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group,
                               OptSpecifier GroupWithValue,
                               std::vector<std::string> &Diagnostics) {

diff  --git a/clang/test/ClangScanDeps/modules-implied-args.c b/clang/test/ClangScanDeps/modules-implied-args.c
new file mode 100644
index 0000000000000..515cc4463e6a6
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-implied-args.c
@@ -0,0 +1,46 @@
+// Check that we get canonical round-trippable command-lines, in particular
+// for the options modified for modules.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full -round-trip-args > %t/result.json
+// RUN: cat %t/result.json  | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t --check-prefixes=CHECK,NEGATIVE
+
+// -ffast-math implies -menable-no-infs, -menable-no-nans, and -mreassociate;
+// those options are modified by resetNonModularOptions.
+
+// NEGATIVE-NOT: "-menable-no-infs"
+// NEGATIVE-NOT: "-menable-no-nans"
+// NEGATIVE-NOT: "-mreassociate"
+
+// CHECK:      "modules": [
+// CHECK-NEXT:   {
+// CHECK:          "clang-module-deps": []
+// CHECK:          "command-line": [
+// CHECK:            "-ffast-math"
+// CHECK:          ]
+// CHECK:          "name": "Mod"
+// CHECK:        }
+// CHECK-NEXT: ]
+// CHECK:      "translation-units": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:     "commands": [
+// CHECK:            {
+// CHECK:              "command-line": [
+// CHECK:                "-ffast-math"
+// CHECK:              ]
+
+//--- cdb.json.template
+[{
+  "file": "DIR/tu.c",
+  "directory": "DIR",
+  "command": "clang -fsyntax-only DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -ffast-math"
+}]
+
+//--- module.modulemap
+module Mod { header "mod.h" }
+//--- mod.h
+//--- tu.c
+#include "mod.h"

diff  --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 1458d11eb323f..53879a4064f0e 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Driver/Driver.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
@@ -197,6 +198,19 @@ static llvm::cl::opt<ResourceDirRecipeKind> ResourceDirRecipe(
     llvm::cl::init(RDRK_ModifyCompilerPath),
     llvm::cl::cat(DependencyScannerCategory));
 
+#ifndef NDEBUG
+static constexpr bool DoRoundTripDefault = true;
+#else
+static constexpr bool DoRoundTripDefault = false;
+#endif
+
+llvm::cl::opt<bool>
+    RoundTripArgs("round-trip-args", llvm::cl::Optional,
+                  llvm::cl::desc("verify that command-line arguments are "
+                                 "canonical by parsing and re-serializing"),
+                  llvm::cl::init(DoRoundTripDefault),
+                  llvm::cl::cat(DependencyScannerCategory));
+
 llvm::cl::opt<bool> Verbose("v", llvm::cl::Optional,
                             llvm::cl::desc("Use verbose output."),
                             llvm::cl::init(false),
@@ -278,6 +292,37 @@ class FullDeps {
     }
   }
 
+  bool roundTripCommand(ArrayRef<std::string> ArgStrs,
+                        DiagnosticsEngine &Diags) {
+    if (ArgStrs.empty() || ArgStrs[0] != "-cc1")
+      return false;
+    SmallVector<const char *> Args;
+    for (const std::string &Arg : ArgStrs)
+      Args.push_back(Arg.c_str());
+    return !CompilerInvocation::checkCC1RoundTrip(Args, Diags);
+  }
+
+  // Returns \c true if any command lines fail to round-trip. We expect
+  // commands already be canonical when output by the scanner.
+  bool roundTripCommands(raw_ostream &ErrOS) {
+    IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions{};
+    TextDiagnosticPrinter DiagConsumer(ErrOS, &*DiagOpts);
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+        CompilerInstance::createDiagnostics(&*DiagOpts, &DiagConsumer,
+                                            /*ShouldOwnClient=*/false);
+
+    for (auto &&M : Modules)
+      if (roundTripCommand(M.second.BuildArguments, *Diags))
+        return true;
+
+    for (auto &&I : Inputs)
+      for (const auto &Cmd : I.Commands)
+        if (roundTripCommand(Cmd.Arguments, *Diags))
+          return true;
+
+    return false;
+  }
+
   void printFullOutput(raw_ostream &OS) {
     // Sort the modules by name to get a deterministic order.
     std::vector<IndexedModuleID> ModuleIDs;
@@ -615,6 +660,10 @@ int main(int argc, const char **argv) {
   }
   Pool.wait();
 
+  if (RoundTripArgs)
+    if (FD.roundTripCommands(llvm::errs()))
+      HadErrors = true;
+
   if (Format == ScanningOutputFormat::Full)
     FD.printFullOutput(llvm::outs());
 


        


More information about the cfe-commits mailing list