[clang] [clang][ScanDeps] Canonicalize -D and -U flags (PR #82298)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 20 08:20:28 PST 2024


================
@@ -179,6 +179,73 @@ static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
   DiagOpts.IgnoreWarnings = true;
 }
 
+// Clang implements -D and -U by splatting text into a predefines buffer. This
+// allows constructs such as `-DFඞ=3 "-D F\u{0D9E} 4 3 2”` to be accepted and
+// define the same macro, or adding C++ style comments before the macro name.
+//
+// This function checks that the first non-space characters in the macro
+// obviously form an identifier that can be uniqued on without lexing. Failing
+// to do this could lead to changing the final definition of a macro.
+//
+// We could set up a preprocessor and actually lex the name, but that's very
+// heavyweight for a situation that will almost never happen in practice.
+static std::optional<StringRef> getSimpleMacroName(StringRef Macro) {
+  StringRef Name = Macro.split("=").first.trim(" \t");
+  std::size_t I = 0;
+  for (; I != Name.size(); ++I) {
+    switch (Name[I]) {
+    case '(': // Start of macro parameter list
+    case ' ': // End of macro name
+    case '\t':
+      goto EndOfMacro;
+    case '_':
+      continue;
+    default:
+      if (llvm::isAlnum(Name[I]))
+        continue;
+      return std::nullopt;
+    }
+  }
+EndOfMacro:
+  StringRef SimpleName = Name.slice(0, I);
+  if (SimpleName.empty())
+    return std::nullopt;
+  return SimpleName;
+}
+
+static void canonicalizeDefines(PreprocessorOptions &PPOpts) {
+  using MacroOpt = std::pair<StringRef, std::size_t>;
+  std::vector<MacroOpt> SimpleNames;
+  SimpleNames.reserve(PPOpts.Macros.size());
+  std::size_t Index = 0;
+  for (const auto &M : PPOpts.Macros) {
+    auto SName = getSimpleMacroName(M.first);
+    // Skip optimizing if we can't guarantee we can preserve relative order.
+    if (!SName)
+      return;
+    SimpleNames.emplace_back(*SName, Index);
+    ++Index;
+  }
+
+  llvm::stable_sort(SimpleNames, [](const MacroOpt &A, const MacroOpt &B) {
+    return A.first < B.first;
+  });
+  // Keep the last instance of each macro name by going in reverse
+  auto NewEnd = std::unique(SimpleNames.rbegin(), SimpleNames.rend(), [](const MacroOpt &A, const MacroOpt &B) {
+    return A.first == B.first;
+  });
+  SimpleNames.erase(SimpleNames.begin(), NewEnd.base());
+
+  // Apply permutation.
+  decltype(PPOpts.Macros) NewMacros;
+  NewMacros.reserve(SimpleNames.size());
+  for (std::size_t I = 0, E = SimpleNames.size(); I != E; ++I) {
+    std::size_t OriginalIndex = SimpleNames[I].second;
+    NewMacros.push_back(std::move(PPOpts.Macros[OriginalIndex]));
----------------
jansvoboda11 wrote:

Makes sense. Can you put that in a comment?

https://github.com/llvm/llvm-project/pull/82298


More information about the cfe-commits mailing list