[clang] reland: [clang][ScanDeps] Canonicalize -D and -U flags (PR #82568)
Michael Spencer via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 21 23:21:05 PST 2024
https://github.com/Bigcheese updated https://github.com/llvm/llvm-project/pull/82568
>From d8bfbdeedbf0a3bdd2db25e7dd389d6f223091a3 Mon Sep 17 00:00:00 2001
From: Michael Spencer <bigcheesegs at gmail.com>
Date: Fri, 16 Feb 2024 22:05:25 -0800
Subject: [PATCH] [clang][ScanDeps] Canonicalize -D and -U flags
Canonicalize `-D` and `-U` flags by sorting them and only keeping the
last instance of a given name.
This optimization will only fire if all `-D` and `-U` flags start with
a simple identifier that we can guarantee a simple analysis of can
determine if two flags refer to the same identifier or not. See the
comment on `getSimpleMacroName()` for details of what the issues are.
---
.../DependencyScanningService.h | 5 +-
.../DependencyScanningWorker.cpp | 74 ++++++++++++++++
.../optimize-canonicalize-macros.m | 87 +++++++++++++++++++
clang/tools/clang-scan-deps/ClangScanDeps.cpp | 1 +
4 files changed, 166 insertions(+), 1 deletion(-)
create mode 100644 clang/test/ClangScanDeps/optimize-canonicalize-macros.m
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 4f9867262a275c..557f0e547ab4a8 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -60,7 +60,10 @@ enum class ScanningOptimizations {
/// Remove unused -ivfsoverlay arguments.
VFS = 4,
- DSS_LAST_BITMASK_ENUM(VFS),
+ /// Canonicalize -D and -U options.
+ Macros = 8,
+
+ DSS_LAST_BITMASK_ENUM(Macros),
Default = All
};
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 3cf3ad8a4e4907..7477b930188b4f 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -179,6 +179,78 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
DiagOpts.IgnoreWarnings = true;
}
+// Clang implements -D and -U by splatting text into a predefines buffer. This
+// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
+// define the same macro, or adding C++ style comments before the macro name.
+//
+// This function checks that the first non-space characters in the macro
+// obviously form an identifier that can be uniqued on without lexing. Failing
+// to do this could lead to changing the final definition of a macro.
+//
+// We could set up a preprocessor and actually lex the name, but that's very
+// heavyweight for a situation that will almost never happen in practice.
+static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
+ StringRef Name = Macro.split("=").first.ltrim(" \t");
+ std::size_t I = 0;
+
+ auto FinishName = [&]() -> std::optional<StringRef> {
+ StringRef SimpleName = Name.slice(0, I);
+ if (SimpleName.empty())
+ return std::nullopt;
+ return SimpleName;
+ };
+
+ for (; I != Name.size(); ++I) {
+ switch (Name[I]) {
+ case '(': // Start of macro parameter list
+ case ' ': // End of macro name
+ case '\t':
+ return FinishName();
+ case '_':
+ continue;
+ default:
+ if (llvm::isAlnum(Name[I]))
+ continue;
+ return std::nullopt;
+ }
+ }
+ return FinishName();
+}
+
+static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
+ using MacroOpt = std::pair<StringRef, std::size_t>;
+ std::vector<MacroOpt> SimpleNames;
+ SimpleNames.reserve(PPOpts.Macros.size());
+ std::size_t Index = 0;
+ for (const auto &M : PPOpts.Macros) {
+ auto SName = getSimpleMacroName(M.first);
+ // Skip optimizing if we can't guarantee we can preserve relative order.
+ if (!SName)
+ return;
+ SimpleNames.emplace_back(*SName, Index);
+ ++Index;
+ }
+
+ llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
+ return A.first < B.first;
+ });
+ // Keep the last instance of each macro name by going in reverse
+ auto NewEnd = std::unique(
+ SimpleNames.rbegin(), SimpleNames.rend(),
+ [](const MacroOpt &A, const MacroOpt &B) { return A.first == B.first; });
+ SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
+
+ // Apply permutation.
+ decltype(PPOpts.Macros) NewMacros;
+ NewMacros.reserve(SimpleNames.size());
+ for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
+ std::size_t OriginalIndex = SimpleNames[I].second;
+ // We still emit undefines here as they may be undefining a predefined macro
+ NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
+ }
+ std::swap(PPOpts.Macros, NewMacros);
+}
+
/// A clang tool that runs the preprocessor in a mode that's optimized for
/// dependency scanning for the given compiler invocation.
class DependencyScanningAction : public tooling::ToolAction {
@@ -203,6 +275,8 @@ class DependencyScanningAction : public tooling::ToolAction {
CompilerInvocation OriginalInvocation(*Invocation);
// Restore the value of DisableFree, which may be modified by Tooling.
OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
+ if (any(OptimizeArgs & ScanningOptimizations::Macros))
+ canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
if (Scanned) {
// Scanning runs once for the first -cc1 invocation in a chain of driver
diff --git a/clang/test/ClangScanDeps/optimize-canonicalize-macros.m b/clang/test/ClangScanDeps/optimize-canonicalize-macros.m
new file mode 100644
index 00000000000000..f7d3fbf8da0cbf
--- /dev/null
+++ b/clang/test/ClangScanDeps/optimize-canonicalize-macros.m
@@ -0,0 +1,87 @@
+// This test verifies that command lines with equivalent -D and -U arguments
+// are canonicalized to the same module variant.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
+// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
+// RUN: -j 1 -format experimental-full -optimize-args=canonicalize-macros > %t/deps.db
+// RUN: cat %t/deps.db | FileCheck %s -DPREFIX=%/t
+
+// Verify that there are only two variants and that the expected merges have
+// happened.
+
+// CHECK: {
+// CHECK-NEXT: "modules": [
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-module-deps": [],
+// CHECK-NEXT: "clang-modulemap-file":
+// CHECK-NEXT: "command-line": [
+// CHECK-NOT: "J=1"
+// CHECK-NOT: "J"
+// CHECK-NOT: "K"
+// CHECK: ],
+// CHECK-NEXT: "context-hash": "{{.*}}",
+// CHECK-NEXT: "file-deps": [
+// CHECK: ],
+// CHECK-NEXT: "name": "A"
+// CHECK-NEXT: },
+// CHECK-NEXT: {
+// CHECK-NEXT: "clang-module-deps": [],
+// CHECK-NEXT: "clang-modulemap-file":
+// CHECK-NEXT: "command-line": [
+// CHECK: "Fඞ"
+// CHECK: "F\\u{0D9E}"
+// CHECK: "K"
+// CHECK: "K"
+// CHECK: ],
+// CHECK-NEXT: "context-hash": "{{.*}}",
+// CHECK-NEXT: "file-deps": [
+// CHECK: ],
+// CHECK-NEXT: "name": "A"
+// CHECK-NEXT: }
+// CHECK-NEXT: ],
+// CHECK-NEXT: "translation-units": [
+// CHECK: ]
+// CHECK: }
+
+
+//--- build/compile-commands.json.in
+
+[
+{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu0.m -DJ=1 -UJ -DJ=2 -DI -DK(x)=x -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+ "file": "DIR/tu0.m"
+},
+{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu1.m -DK -DK(x)=x -DI -D \"J=2\" -I modules/A -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+ "file": "DIR/tu1.m"
+},
+{
+ "directory": "DIR",
+ "command": "clang -c DIR/tu2.m -I modules/A -DFඞ -DF\\u{0D9E} -DK -DK -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+ "file": "DIR/tu2.m"
+}
+]
+
+//--- modules/A/module.modulemap
+
+module A {
+ umbrella header "A.h"
+}
+
+//--- modules/A/A.h
+
+//--- tu0.m
+
+#include <A.h>
+
+//--- tu1.m
+
+#include <A.h>
+
+//--- tu2.m
+
+#include <A.h>
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0458a4b3ecec39..9811d2a8753359 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) {
.Case("header-search", ScanningOptimizations::HeaderSearch)
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
.Case("vfs", ScanningOptimizations::VFS)
+ .Case("canonicalize-macros", ScanningOptimizations::Macros)
.Case("all", ScanningOptimizations::All)
.Default(std::nullopt);
if (!Optimization) {
More information about the cfe-commits
mailing list