[clang] [flang] [flang][Frontend] Implement printing defined macros via -dM (PR #87627)
Krzysztof Parzyszek via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 4 12:44:12 PDT 2024
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/87627
>From f4917dcf99664442d262226cd1ce1058646d7a55 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 3 Apr 2024 17:09:24 -0500
Subject: [PATCH 1/6] [flang][Frontend] Implement printing defined macros via
-dM
This should work the same way as in clang.
---
clang/include/clang/Driver/Options.td | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 5 +-
.../flang/Frontend/PreprocessorOptions.h | 3 +
flang/include/flang/Parser/parsing.h | 5 +
flang/lib/Frontend/CompilerInvocation.cpp | 1 +
flang/lib/Frontend/FrontendActions.cpp | 4 +-
flang/lib/Parser/parsing.cpp | 18 ++-
flang/lib/Parser/preprocessor.cpp | 128 ++++++++++++++++++
flang/lib/Parser/preprocessor.h | 6 +
flang/test/Driver/driver-help-hidden.f90 | 1 +
flang/test/Driver/driver-help.f90 | 2 +
flang/test/Preprocessing/show-macros1.F90 | 14 ++
flang/test/Preprocessing/show-macros2.F90 | 6 +
flang/test/Preprocessing/show-macros3.F90 | 10 ++
14 files changed, 196 insertions(+), 9 deletions(-)
create mode 100644 flang/test/Preprocessing/show-macros1.F90
create mode 100644 flang/test/Preprocessing/show-macros2.F90
create mode 100644 flang/test/Preprocessing/show-macros3.F90
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c3e90a70925b78..b1ed29cb1cbc44 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1446,7 +1446,7 @@ def dD : Flag<["-"], "dD">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>
def dI : Flag<["-"], "dI">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Print include directives in -E mode in addition to normal output">,
MarshallingInfoFlag<PreprocessorOutputOpts<"ShowIncludeDirectives">>;
-def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option]>,
+def dM : Flag<["-"], "dM">, Group<d_Group>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Print macro definitions in -E mode instead of normal output">;
def dead__strip : Flag<["-"], "dead_strip">;
def dependency_file : Separate<["-"], "dependency-file">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 70daa699e3a949..bfd07addfca811 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -688,7 +688,10 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(TripleStr));
if (isa<PreprocessJobAction>(JA)) {
- CmdArgs.push_back("-E");
+ CmdArgs.push_back("-E");
+ if (Args.getLastArg(options::OPT_dM)) {
+ CmdArgs.push_back("-dM");
+ }
} else if (isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) {
if (JA.getType() == types::TY_Nothing) {
CmdArgs.push_back("-fsyntax-only");
diff --git a/flang/include/flang/Frontend/PreprocessorOptions.h b/flang/include/flang/Frontend/PreprocessorOptions.h
index b2e9ac0e963b73..13a91ee9a184f8 100644
--- a/flang/include/flang/Frontend/PreprocessorOptions.h
+++ b/flang/include/flang/Frontend/PreprocessorOptions.h
@@ -56,6 +56,9 @@ struct PreprocessorOptions {
// -fno-reformat: Emit cooked character stream as -E output
bool noReformat{false};
+ // -dM: Show macro definitions with -dM -E
+ bool showMacros{false};
+
void addMacroDef(llvm::StringRef name) {
macros.emplace_back(std::string(name), false);
}
diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h
index e80d8f724ac8f4..14891c44dacafd 100644
--- a/flang/include/flang/Parser/parsing.h
+++ b/flang/include/flang/Parser/parsing.h
@@ -16,6 +16,7 @@
#include "provenance.h"
#include "flang/Common/Fortran-features.h"
#include "llvm/Support/raw_ostream.h"
+#include <memory>
#include <optional>
#include <string>
#include <utility>
@@ -23,6 +24,8 @@
namespace Fortran::parser {
+class Preprocessor;
+
struct Options {
Options() {}
@@ -59,6 +62,7 @@ class Parsing {
const SourceFile *Prescan(const std::string &path, Options);
void EmitPreprocessedSource(
llvm::raw_ostream &, bool lineDirectives = true) const;
+ void EmitPreprocessorMacros(llvm::raw_ostream &) const;
void DumpCookedChars(llvm::raw_ostream &) const;
void DumpProvenance(llvm::raw_ostream &) const;
void DumpParsingLog(llvm::raw_ostream &) const;
@@ -83,6 +87,7 @@ class Parsing {
const char *finalRestingPlace_{nullptr};
std::optional<Program> parseTree_;
ParsingLog log_;
+ std::unique_ptr<Preprocessor> preprocessor_;
};
} // namespace Fortran::parser
#endif // FORTRAN_PARSER_PARSING_H_
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index c830c7af2462c9..8ce6ab7baf4812 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -772,6 +772,7 @@ static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts,
opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat);
opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P);
+ opts.showMacros = args.hasArg(clang::driver::options::OPT_dM);
}
/// Parses all semantic related arguments and populates the variables
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 849b3c8e4dc027..8f251997ed401b 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -399,7 +399,9 @@ void PrintPreprocessedAction::executeAction() {
// Format or dump the prescanner's output
CompilerInstance &ci = this->getInstance();
- if (ci.getInvocation().getPreprocessorOpts().noReformat) {
+ if (ci.getInvocation().getPreprocessorOpts().showMacros) {
+ ci.getParsing().EmitPreprocessorMacros(outForPP);
+ } else if (ci.getInvocation().getPreprocessorOpts().noReformat) {
ci.getParsing().DumpCookedChars(outForPP);
} else {
ci.getParsing().EmitPreprocessedSource(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index a55d33bf6b91d6..ec008be1fcea9d 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -60,20 +60,20 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
}
}
- Preprocessor preprocessor{allSources};
+ preprocessor_ = std::make_unique<Preprocessor>(allSources);
if (!options.predefinitions.empty()) {
- preprocessor.DefineStandardMacros();
+ preprocessor_->DefineStandardMacros();
for (const auto &predef : options.predefinitions) {
if (predef.second) {
- preprocessor.Define(predef.first, *predef.second);
+ preprocessor_->Define(predef.first, *predef.second);
} else {
- preprocessor.Undefine(predef.first);
+ preprocessor_->Undefine(predef.first);
}
}
}
currentCooked_ = &allCooked_.NewCookedSource();
Prescanner prescanner{
- messages_, *currentCooked_, preprocessor, options.features};
+ messages_, *currentCooked_, *preprocessor_, options.features};
prescanner.set_fixedForm(options.isFixedForm)
.set_fixedFormColumnLimit(options.fixedFormColumns)
.AddCompilerDirectiveSentinel("dir$");
@@ -87,7 +87,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
if (options.features.IsEnabled(LanguageFeature::CUDA)) {
prescanner.AddCompilerDirectiveSentinel("$cuf");
prescanner.AddCompilerDirectiveSentinel("@cuf");
- preprocessor.Define("_CUDA", "1");
+ preprocessor_->Define("_CUDA", "1");
}
ProvenanceRange range{allSources.AddIncludedFile(
*sourceFile, ProvenanceRange{}, options.isModuleFile)};
@@ -107,6 +107,12 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
return sourceFile;
}
+void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const {
+ if (preprocessor_) {
+ preprocessor_->PrintMacros(out);
+ }
+}
+
void Parsing::EmitPreprocessedSource(
llvm::raw_ostream &out, bool lineDirectives) const {
const std::string *sourcePath{nullptr};
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 515b8f62daf9ad..37e357e91d6f45 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -11,6 +11,9 @@
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cinttypes>
@@ -46,6 +49,110 @@ bool Definition::set_isDisabled(bool disable) {
return was;
}
+void Definition::Print(
+ llvm::raw_ostream &out, llvm::StringRef macroName) const {
+ if (isDisabled_) {
+ return;
+ }
+ if (!isFunctionLike_) {
+ // If it's not a function-like macro, then just print the replacement.
+ out << ' ' << replacement_.ToString();
+ return;
+ }
+
+ // The sequence of characters from which argument names will be created.
+ static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
+
+ auto couldCollide = [&](llvm::StringRef str) {
+ return !str.empty() && llvm::all_of(str, [&](char c) {
+ return charSeq.find(c) != llvm::StringRef::npos;
+ });
+ };
+
+ // For function-like macros we need to invent valid argument names (they
+ // are represented as ~A, ~B, ...). These invented names cannot collide
+ // with any other tokens in the macro definitions.
+ llvm::SmallSet<std::string, 10> usedNames;
+ for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
+ std::string tok{replacement_.TokenAt(i).ToString()};
+ if (tok.empty()) {
+ continue;
+ }
+ // The generated names will only use characters from `charSeq`, so
+ // collect names that could collide, and ignore others.
+ if (couldCollide(tok)) {
+ usedNames.insert(tok);
+ }
+ }
+ if (couldCollide(macroName)) {
+ usedNames.insert(macroName.str());
+ }
+
+ // Given a string that is either empty, or composed from characters
+ // from `charSeq`, create the next string in the lexicographical
+ // order.
+ auto getNextString = [&](llvm::StringRef str) {
+ if (str.empty()) {
+ return charSeq.take_front().str();
+ }
+ if (str.back() == charSeq.back()) {
+ return (llvm::Twine(str) + charSeq.take_front()).str();
+ }
+ size_t idx{charSeq.find(str.back())};
+ return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str();
+ };
+
+ // Generate consecutive arg names, until we get one that works
+ // (i.e. doesn't collide with existing names). Give up after 4096
+ // attempts.
+ auto genArgName = [&](std::string name) {
+ for (size_t x{0}; x != 4096; ++x) {
+ name = getNextString(name);
+ if (!usedNames.contains(name))
+ return name;
+ }
+ return std::string();
+ };
+
+ std::string nextName;
+ llvm::SmallVector<std::string> argNames;
+ for (size_t i{0}; i != argumentCount_; ++i) {
+ nextName = genArgName(nextName);
+ if (nextName.empty()) {
+ out << " // unable to print";
+ return;
+ }
+ argNames.push_back(nextName);
+ }
+
+ // Finally, print the macro.
+ out << '(';
+ for (size_t i{0}; i != argumentCount_; ++i) {
+ if (i != 0) {
+ out << ", ";
+ }
+ out << argNames[i];
+ }
+ if (isVariadic_) {
+ out << ", ...";
+ }
+ out << ") ";
+
+ for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
+ std::string tok{replacement_.TokenAt(i).ToString()};
+ if (tok.size() >= 2 && tok[0] == '~') {
+ // This should be an argument name. The `Tokenize` function only
+ // generates a single character.
+ size_t idx{static_cast<size_t>(tok[1] - 'A')};
+ if (idx < argumentCount_) {
+ out << argNames[idx];
+ continue;
+ }
+ }
+ out << tok;
+ }
+}
+
static bool IsLegalIdentifierStart(const CharBlock &cpl) {
return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
}
@@ -713,6 +820,27 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
}
+void Preprocessor::PrintMacros(llvm::raw_ostream &out) const {
+ // Sort the entries by macro name.
+ llvm::SmallVector<decltype(definitions_)::const_iterator> entries;
+ for (auto it{definitions_.begin()}, e{definitions_.end()}; it != e; ++it) {
+ entries.push_back(it);
+ }
+ llvm::sort(entries, [](const auto it1, const auto it2) {
+ return it1->first.ToString() < it2->first.ToString();
+ });
+
+ for (auto &&it : entries) {
+ const auto &[name, def]{*it};
+ if (def.isDisabled()) {
+ continue;
+ }
+ out << "#define " << name;
+ def.Print(out, name.ToString());
+ out << '\n';
+ }
+}
+
CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
names_.push_back(t.ToString());
return {names_.back().data(), names_.back().size()};
diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index b61f1577727beb..b4177766f81c4b 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -18,6 +18,8 @@
#include "token-sequence.h"
#include "flang/Parser/char-block.h"
#include "flang/Parser/provenance.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <list>
#include <stack>
@@ -49,6 +51,8 @@ class Definition {
TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &);
+ void Print(llvm::raw_ostream &out, llvm::StringRef macroName = "") const;
+
private:
static TokenSequence Tokenize(const std::vector<std::string> &argNames,
const TokenSequence &token, std::size_t firstToken, std::size_t tokens);
@@ -89,6 +93,8 @@ class Preprocessor {
// Implements a preprocessor directive.
void Directive(const TokenSequence &, Prescanner &);
+ void PrintMacros(llvm::raw_ostream &out) const;
+
private:
enum class IsElseActive { No, Yes };
enum class CanDeadElseAppear { No, Yes };
diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index bf3660d57cbb4f..fd2b0e41e38c56 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -21,6 +21,7 @@
! CHECK-NEXT: -ccc-print-phases Dump list of actions to perform
! CHECK-NEXT: -cpp Enable predefined and command line preprocessor macros
! CHECK-NEXT: -c Only run preprocess, compile, and assemble steps
+! CHECK-NEXT: -dM Print macro definitions in -E mode instead of normal output
! CHECK-NEXT: -dumpmachine Display the compiler's target processor
! CHECK-NEXT: -dumpversion Display the version of the compiler
! CHECK-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index b4280a454e3128..368cab97d8547f 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -17,6 +17,7 @@
! HELP-NEXT: -### Print (but do not run) the commands to run for this compilation
! HELP-NEXT: -cpp Enable predefined and command line preprocessor macros
! HELP-NEXT: -c Only run preprocess, compile, and assemble steps
+! HELP-NEXT: -dM Print macro definitions in -E mode instead of normal output
! HELP-NEXT: -dumpmachine Display the compiler's target processor
! HELP-NEXT: -dumpversion Display the version of the compiler
! HELP-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
@@ -152,6 +153,7 @@
! HELP-FC1-NEXT:OPTIONS:
! HELP-FC1-NEXT: -cpp Enable predefined and command line preprocessor macros
! HELP-FC1-NEXT: --dependent-lib=<value> Add dependent library
+! HELP-FC1-NEXT: -dM Print macro definitions in -E mode instead of normal output
! HELP-FC1-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
! HELP-FC1-NEXT: -emit-fir Build the parse tree, then lower it to FIR
! HELP-FC1-NEXT: -emit-hlfir Build the parse tree, then lower it to HLFIR
diff --git a/flang/test/Preprocessing/show-macros1.F90 b/flang/test/Preprocessing/show-macros1.F90
new file mode 100644
index 00000000000000..8e3d59a7849f70
--- /dev/null
+++ b/flang/test/Preprocessing/show-macros1.F90
@@ -0,0 +1,14 @@
+! RUN: %flang -dM -E -o - %s | FileCheck %s
+
+! Check the default macros. Omit certain ones such as __LINE__
+! or __FILE__, or target-specific ones, like __x86_64__.
+
+! Macros are printed in the alphabetical order.
+
+! CHECK: #define __DATE__
+! CHECK: #define __TIME__
+! CHECK: #define __flang__
+! CHECK: #define __flang_major__
+! CHECK: #define __flang_minor__
+! CHECK: #define __flang_patchlevel__
+
diff --git a/flang/test/Preprocessing/show-macros2.F90 b/flang/test/Preprocessing/show-macros2.F90
new file mode 100644
index 00000000000000..baf52ba8161f11
--- /dev/null
+++ b/flang/test/Preprocessing/show-macros2.F90
@@ -0,0 +1,6 @@
+! RUN: %flang -DFOO -DBAR=FOO -dM -E -o - %s | FileCheck %s
+
+! Check command line definitions
+
+! CHECK: #define BAR FOO
+! CHECK: #define FOO 1
diff --git a/flang/test/Preprocessing/show-macros3.F90 b/flang/test/Preprocessing/show-macros3.F90
new file mode 100644
index 00000000000000..4b07fcf2f505db
--- /dev/null
+++ b/flang/test/Preprocessing/show-macros3.F90
@@ -0,0 +1,10 @@
+! RUN: %flang -dM -E -o - %s | FileCheck %s
+
+! Variadic macro
+#define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__)
+! CHECK: #define FOO1(A, B, ...) bar(bar(A, B), __VA_ARGS__)
+
+! Macro parameter names are synthesized, starting from 'A', B', etc.
+! Make sure the generated names do not collide with existing identifiers.
+#define FOO2(X, Y) (A + X + C + Y)
+! CHECK: #define FOO2(B, D) (A + B + C + D)
>From 7bab3bea49cdecddf2df2f8c5270b4cc274ab60f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 4 Apr 2024 11:53:13 -0500
Subject: [PATCH 2/6] Address review comments
---
flang/lib/Parser/preprocessor.cpp | 65 ++++++++++++++-----------------
1 file changed, 30 insertions(+), 35 deletions(-)
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 37e357e91d6f45..03305024d6932b 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -12,7 +12,6 @@
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -24,6 +23,7 @@
#include <optional>
#include <set>
#include <utility>
+#include <vector>
namespace Fortran::parser {
@@ -51,9 +51,6 @@ bool Definition::set_isDisabled(bool disable) {
void Definition::Print(
llvm::raw_ostream &out, llvm::StringRef macroName) const {
- if (isDisabled_) {
- return;
- }
if (!isFunctionLike_) {
// If it's not a function-like macro, then just print the replacement.
out << ' ' << replacement_.ToString();
@@ -63,16 +60,16 @@ void Definition::Print(
// The sequence of characters from which argument names will be created.
static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
- auto couldCollide = [&](llvm::StringRef str) {
+ auto couldCollide{[&](llvm::StringRef str) {
return !str.empty() && llvm::all_of(str, [&](char c) {
return charSeq.find(c) != llvm::StringRef::npos;
});
- };
+ }};
// For function-like macros we need to invent valid argument names (they
// are represented as ~A, ~B, ...). These invented names cannot collide
// with any other tokens in the macro definitions.
- llvm::SmallSet<std::string, 10> usedNames;
+ std::set<std::string> usedNames;
for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
std::string tok{replacement_.TokenAt(i).ToString()};
if (tok.empty()) {
@@ -91,7 +88,7 @@ void Definition::Print(
// Given a string that is either empty, or composed from characters
// from `charSeq`, create the next string in the lexicographical
// order.
- auto getNextString = [&](llvm::StringRef str) {
+ auto getNextString{[&](llvm::StringRef str) {
if (str.empty()) {
return charSeq.take_front().str();
}
@@ -100,22 +97,22 @@ void Definition::Print(
}
size_t idx{charSeq.find(str.back())};
return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str();
- };
+ }};
// Generate consecutive arg names, until we get one that works
// (i.e. doesn't collide with existing names). Give up after 4096
// attempts.
- auto genArgName = [&](std::string name) {
+ auto genArgName{[&](std::string name) {
for (size_t x{0}; x != 4096; ++x) {
name = getNextString(name);
- if (!usedNames.contains(name))
+ if (usedNames.count(name) == 0)
return name;
}
return std::string();
- };
+ }};
std::string nextName;
- llvm::SmallVector<std::string> argNames;
+ std::vector<std::string> argNames;
for (size_t i{0}; i != argumentCount_; ++i) {
nextName = genArgName(nextName);
if (nextName.empty()) {
@@ -138,18 +135,22 @@ void Definition::Print(
}
out << ") ";
+ auto getArgumentIndex{[&](llvm::StringRef name) -> size_t {
+ if (name.size() >= 2 && name[0] == '~') {
+ // `name` should be an argument name. The `Tokenize` function only
+ // generates a single character.
+ return static_cast<size_t>(name[1] - 'A');
+ }
+ return argumentCount_;
+ }};
+
for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
std::string tok{replacement_.TokenAt(i).ToString()};
- if (tok.size() >= 2 && tok[0] == '~') {
- // This should be an argument name. The `Tokenize` function only
- // generates a single character.
- size_t idx{static_cast<size_t>(tok[1] - 'A')};
- if (idx < argumentCount_) {
- out << argNames[idx];
- continue;
- }
+ if (size_t idx = getArgumentIndex(tok); idx < argumentCount_) {
+ out << argNames[idx];
+ } else {
+ out << tok;
}
- out << tok;
}
}
@@ -821,22 +822,16 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
void Preprocessor::PrintMacros(llvm::raw_ostream &out) const {
- // Sort the entries by macro name.
- llvm::SmallVector<decltype(definitions_)::const_iterator> entries;
- for (auto it{definitions_.begin()}, e{definitions_.end()}; it != e; ++it) {
- entries.push_back(it);
+ // std::set is ordered. Use that to print the macros in an
+ // alphabetical order.
+ std::set<std::string> macroNames;
+ for (const auto &[name, _] : definitions_) {
+ macroNames.insert(name.ToString());
}
- llvm::sort(entries, [](const auto it1, const auto it2) {
- return it1->first.ToString() < it2->first.ToString();
- });
- for (auto &&it : entries) {
- const auto &[name, def]{*it};
- if (def.isDisabled()) {
- continue;
- }
+ for (const std::string &name : macroNames) {
out << "#define " << name;
- def.Print(out, name.ToString());
+ definitions_.at(name).Print(out, name);
out << '\n';
}
}
>From b3993e96744fe0ef15461b38316b28f75fc91b01 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 4 Apr 2024 13:18:51 -0500
Subject: [PATCH 3/6] Store argument names, remove argument name generation
---
flang/lib/Parser/preprocessor.cpp | 105 ++++------------------
flang/lib/Parser/preprocessor.h | 8 +-
flang/test/Preprocessing/show-macros3.F90 | 9 +-
3 files changed, 29 insertions(+), 93 deletions(-)
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 03305024d6932b..30f5ff4df8f5d1 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -32,10 +32,9 @@ Definition::Definition(
: replacement_{Tokenize({}, repl, firstToken, tokens)} {}
Definition::Definition(const std::vector<std::string> &argNames,
- const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
- bool isVariadic)
- : isFunctionLike_{true},
- argumentCount_(argNames.size()), isVariadic_{isVariadic},
+ const TokenSequence &repl, std::size_t firstToken,
+ std::size_t tokens, bool isVariadic)
+ : isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames},
replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
Definition::Definition(const std::string &predefined, AllSources &sources)
@@ -57,97 +56,24 @@ void Definition::Print(
return;
}
- // The sequence of characters from which argument names will be created.
- static llvm::StringRef charSeq{"ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
+ size_t argCount{argumentCount()};
- auto couldCollide{[&](llvm::StringRef str) {
- return !str.empty() && llvm::all_of(str, [&](char c) {
- return charSeq.find(c) != llvm::StringRef::npos;
- });
- }};
-
- // For function-like macros we need to invent valid argument names (they
- // are represented as ~A, ~B, ...). These invented names cannot collide
- // with any other tokens in the macro definitions.
- std::set<std::string> usedNames;
- for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
- std::string tok{replacement_.TokenAt(i).ToString()};
- if (tok.empty()) {
- continue;
- }
- // The generated names will only use characters from `charSeq`, so
- // collect names that could collide, and ignore others.
- if (couldCollide(tok)) {
- usedNames.insert(tok);
- }
- }
- if (couldCollide(macroName)) {
- usedNames.insert(macroName.str());
- }
-
- // Given a string that is either empty, or composed from characters
- // from `charSeq`, create the next string in the lexicographical
- // order.
- auto getNextString{[&](llvm::StringRef str) {
- if (str.empty()) {
- return charSeq.take_front().str();
- }
- if (str.back() == charSeq.back()) {
- return (llvm::Twine(str) + charSeq.take_front()).str();
- }
- size_t idx{charSeq.find(str.back())};
- return (llvm::Twine(str.drop_back()) + charSeq.substr(idx + 1, 1)).str();
- }};
-
- // Generate consecutive arg names, until we get one that works
- // (i.e. doesn't collide with existing names). Give up after 4096
- // attempts.
- auto genArgName{[&](std::string name) {
- for (size_t x{0}; x != 4096; ++x) {
- name = getNextString(name);
- if (usedNames.count(name) == 0)
- return name;
- }
- return std::string();
- }};
-
- std::string nextName;
- std::vector<std::string> argNames;
- for (size_t i{0}; i != argumentCount_; ++i) {
- nextName = genArgName(nextName);
- if (nextName.empty()) {
- out << " // unable to print";
- return;
- }
- argNames.push_back(nextName);
- }
-
- // Finally, print the macro.
out << '(';
- for (size_t i{0}; i != argumentCount_; ++i) {
+ for (size_t i{0}; i != argCount; ++i) {
if (i != 0) {
out << ", ";
}
- out << argNames[i];
+ out << argNames_[i];
}
if (isVariadic_) {
out << ", ...";
}
out << ") ";
- auto getArgumentIndex{[&](llvm::StringRef name) -> size_t {
- if (name.size() >= 2 && name[0] == '~') {
- // `name` should be an argument name. The `Tokenize` function only
- // generates a single character.
- return static_cast<size_t>(name[1] - 'A');
- }
- return argumentCount_;
- }};
-
for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
std::string tok{replacement_.TokenAt(i).ToString()};
- if (size_t idx = getArgumentIndex(tok); idx < argumentCount_) {
- out << argNames[idx];
+ if (size_t idx = getArgumentIndex(tok); idx < argCount) {
+ out << argNames_[idx];
} else {
out << tok;
}
@@ -181,6 +107,13 @@ TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
return result;
}
+std::size_t Definition::getArgumentIndex(const CharBlock &token) const {
+ if (token.size() >= 2 && token[0] == '~') {
+ return static_cast<size_t>(token[1] - 'A');
+ }
+ return argumentCount();
+}
+
static TokenSequence Stringify(
const TokenSequence &tokens, AllSources &allSources) {
TokenSequence result;
@@ -267,7 +200,7 @@ TokenSequence Definition::Apply(
continue;
}
if (bytes == 2 && token[0] == '~') { // argument substitution
- std::size_t index = token[1] - 'A';
+ std::size_t index = getArgumentIndex(token);
if (index >= args.size()) {
continue;
}
@@ -310,8 +243,8 @@ TokenSequence Definition::Apply(
Provenance commaProvenance{
prescanner.preprocessor().allSources().CompilerInsertionProvenance(
',')};
- for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
- if (k > argumentCount_) {
+ for (std::size_t k{argumentCount()}; k < args.size(); ++k) {
+ if (k > argumentCount()) {
result.Put(","s, commaProvenance);
}
result.Put(args[k]);
@@ -320,7 +253,7 @@ TokenSequence Definition::Apply(
j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
parenthesesNesting == 0) {
parenthesesNesting = 1;
- skipping = args.size() == argumentCount_;
+ skipping = args.size() == argumentCount();
++j;
} else {
if (parenthesesNesting > 0) {
diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index b4177766f81c4b..8828fe944aab50 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -41,7 +41,7 @@ class Definition {
Definition(const std::string &predefined, AllSources &);
bool isFunctionLike() const { return isFunctionLike_; }
- std::size_t argumentCount() const { return argumentCount_; }
+ std::size_t argumentCount() const { return argNames_.size(); }
bool isVariadic() const { return isVariadic_; }
bool isDisabled() const { return isDisabled_; }
bool isPredefined() const { return isPredefined_; }
@@ -56,12 +56,16 @@ class Definition {
private:
static TokenSequence Tokenize(const std::vector<std::string> &argNames,
const TokenSequence &token, std::size_t firstToken, std::size_t tokens);
+ // For a given token, return the index of the argument to which the token
+ // corresponds, or `argumentCount` if the token does not correspond to any
+ // argument.
+ std::size_t getArgumentIndex(const CharBlock &token) const;
bool isFunctionLike_{false};
- std::size_t argumentCount_{0};
bool isVariadic_{false};
bool isDisabled_{false};
bool isPredefined_{false};
+ std::vector<std::string> argNames_;
TokenSequence replacement_;
};
diff --git a/flang/test/Preprocessing/show-macros3.F90 b/flang/test/Preprocessing/show-macros3.F90
index 4b07fcf2f505db..951a1ec5ba16f4 100644
--- a/flang/test/Preprocessing/show-macros3.F90
+++ b/flang/test/Preprocessing/show-macros3.F90
@@ -2,9 +2,8 @@
! Variadic macro
#define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__)
-! CHECK: #define FOO1(A, B, ...) bar(bar(A, B), __VA_ARGS__)
+! CHECK: #define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__)
-! Macro parameter names are synthesized, starting from 'A', B', etc.
-! Make sure the generated names do not collide with existing identifiers.
-#define FOO2(X, Y) (A + X + C + Y)
-! CHECK: #define FOO2(B, D) (A + B + C + D)
+! Macro with an unused parameter
+#define FOO2(X, Y, Z) (X + Z)
+! CHECK: #define FOO2(X, Y, Z) (X + Z)
>From baeaa9d4ce7178b15358472ee6f05b48fce144b0 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 4 Apr 2024 13:27:13 -0500
Subject: [PATCH 4/6] clang-format
---
flang/lib/Parser/preprocessor.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 30f5ff4df8f5d1..bd725ee2b49bfc 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -32,8 +32,8 @@ Definition::Definition(
: replacement_{Tokenize({}, repl, firstToken, tokens)} {}
Definition::Definition(const std::vector<std::string> &argNames,
- const TokenSequence &repl, std::size_t firstToken,
- std::size_t tokens, bool isVariadic)
+ const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
+ bool isVariadic)
: isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames},
replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
>From 1802510cb73b3a406facf739c40233194732d376 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 4 Apr 2024 12:08:45 -0500
Subject: [PATCH 5/6] Move preprocessor.h and token-sequence.h to flang/include
directory
preprocessor.h is moved to make definition of Preprocessor available
in parsing.h.
token-sequence is moved because preprocessor.h depends on it.
---
clang/docs/tools/clang-formatted-files.txt | 4 ++--
flang/{lib => include/flang}/Parser/preprocessor.h | 2 +-
flang/{lib => include/flang}/Parser/token-sequence.h | 0
flang/lib/Parser/parsing.cpp | 2 +-
flang/lib/Parser/preprocessor.cpp | 3 ++-
flang/lib/Parser/prescan.cpp | 4 ++--
flang/lib/Parser/prescan.h | 2 +-
flang/lib/Parser/token-sequence.cpp | 3 ++-
8 files changed, 11 insertions(+), 9 deletions(-)
rename flang/{lib => include/flang}/Parser/preprocessor.h (99%)
rename flang/{lib => include/flang}/Parser/token-sequence.h (100%)
diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 70687c23b15e61..8fd4fed25a32a1 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -2147,8 +2147,10 @@ flang/include/flang/Parser/message.h
flang/include/flang/Parser/parse-state.h
flang/include/flang/Parser/parse-tree-visitor.h
flang/include/flang/Parser/parsing.h
+flang/include/flang/Parser/preprocessor.h
flang/include/flang/Parser/provenance.h
flang/include/flang/Parser/source.h
+flang/include/flang/Parser/token-sequence.h
flang/include/flang/Parser/tools.h
flang/include/flang/Parser/unparse.h
flang/include/flang/Parser/user-state.h
@@ -2319,7 +2321,6 @@ flang/lib/Parser/openmp-parsers.cpp
flang/lib/Parser/parse-tree.cpp
flang/lib/Parser/parsing.cpp
flang/lib/Parser/preprocessor.cpp
-flang/lib/Parser/preprocessor.h
flang/lib/Parser/prescan.cpp
flang/lib/Parser/prescan.h
flang/lib/Parser/program-parsers.cpp
@@ -2328,7 +2329,6 @@ flang/lib/Parser/source.cpp
flang/lib/Parser/stmt-parser.h
flang/lib/Parser/token-parsers.h
flang/lib/Parser/token-sequence.cpp
-flang/lib/Parser/token-sequence.h
flang/lib/Parser/tools.cpp
flang/lib/Parser/type-parser-implementation.h
flang/lib/Parser/type-parsers.h
diff --git a/flang/lib/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h
similarity index 99%
rename from flang/lib/Parser/preprocessor.h
rename to flang/include/flang/Parser/preprocessor.h
index 8828fe944aab50..f5f94cdf78dc8e 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/include/flang/Parser/preprocessor.h
@@ -15,9 +15,9 @@
// performed, so that special compiler command options &/or source file name
// extensions for preprocessing will not be necessary.
-#include "token-sequence.h"
#include "flang/Parser/char-block.h"
#include "flang/Parser/provenance.h"
+#include "flang/Parser/token-sequence.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
diff --git a/flang/lib/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h
similarity index 100%
rename from flang/lib/Parser/token-sequence.h
rename to flang/include/flang/Parser/token-sequence.h
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index ec008be1fcea9d..39c2fdcab19d55 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -7,10 +7,10 @@
//===----------------------------------------------------------------------===//
#include "flang/Parser/parsing.h"
-#include "preprocessor.h"
#include "prescan.h"
#include "type-parsers.h"
#include "flang/Parser/message.h"
+#include "flang/Parser/preprocessor.h"
#include "flang/Parser/provenance.h"
#include "flang/Parser/source.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index bd725ee2b49bfc..1c41bdad29584f 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "preprocessor.h"
+#include "flang/Parser/preprocessor.h"
+
#include "prescan.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index e9b23172ed2e28..96db3955299f33 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===//
#include "prescan.h"
-#include "preprocessor.h"
-#include "token-sequence.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
+#include "flang/Parser/preprocessor.h"
#include "flang/Parser/source.h"
+#include "flang/Parser/token-sequence.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <cstring>
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 7442b5d2263354..581980001bcc23 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -16,11 +16,11 @@
// fixed form character literals on truncated card images, file
// inclusion, and driving the Fortran source preprocessor.
-#include "token-sequence.h"
#include "flang/Common/Fortran-features.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "flang/Parser/provenance.h"
+#include "flang/Parser/token-sequence.h"
#include <bitset>
#include <optional>
#include <string>
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 799d13a423660c..d0254ecd5aaefc 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "token-sequence.h"
+#include "flang/Parser/token-sequence.h"
+
#include "prescan.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
>From 477937569028e0a7f253512d571ae291265ea01c Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 4 Apr 2024 12:35:50 -0500
Subject: [PATCH 6/6] Make Preprocessor not be dynamically allocated
---
flang/include/flang/Parser/parsing.h | 6 ++----
flang/lib/Parser/parsing.cpp | 15 ++++++---------
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/flang/include/flang/Parser/parsing.h b/flang/include/flang/Parser/parsing.h
index 14891c44dacafd..4d329c189cb80e 100644
--- a/flang/include/flang/Parser/parsing.h
+++ b/flang/include/flang/Parser/parsing.h
@@ -15,8 +15,8 @@
#include "parse-tree.h"
#include "provenance.h"
#include "flang/Common/Fortran-features.h"
+#include "flang/Parser/preprocessor.h"
#include "llvm/Support/raw_ostream.h"
-#include <memory>
#include <optional>
#include <string>
#include <utility>
@@ -24,8 +24,6 @@
namespace Fortran::parser {
-class Preprocessor;
-
struct Options {
Options() {}
@@ -87,7 +85,7 @@ class Parsing {
const char *finalRestingPlace_{nullptr};
std::optional<Program> parseTree_;
ParsingLog log_;
- std::unique_ptr<Preprocessor> preprocessor_;
+ Preprocessor preprocessor_{allCooked_.allSources()};
};
} // namespace Fortran::parser
#endif // FORTRAN_PARSER_PARSING_H_
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 39c2fdcab19d55..43a898ff120c5d 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -60,20 +60,19 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
}
}
- preprocessor_ = std::make_unique<Preprocessor>(allSources);
if (!options.predefinitions.empty()) {
- preprocessor_->DefineStandardMacros();
+ preprocessor_.DefineStandardMacros();
for (const auto &predef : options.predefinitions) {
if (predef.second) {
- preprocessor_->Define(predef.first, *predef.second);
+ preprocessor_.Define(predef.first, *predef.second);
} else {
- preprocessor_->Undefine(predef.first);
+ preprocessor_.Undefine(predef.first);
}
}
}
currentCooked_ = &allCooked_.NewCookedSource();
Prescanner prescanner{
- messages_, *currentCooked_, *preprocessor_, options.features};
+ messages_, *currentCooked_, preprocessor_, options.features};
prescanner.set_fixedForm(options.isFixedForm)
.set_fixedFormColumnLimit(options.fixedFormColumns)
.AddCompilerDirectiveSentinel("dir$");
@@ -87,7 +86,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
if (options.features.IsEnabled(LanguageFeature::CUDA)) {
prescanner.AddCompilerDirectiveSentinel("$cuf");
prescanner.AddCompilerDirectiveSentinel("@cuf");
- preprocessor_->Define("_CUDA", "1");
+ preprocessor_.Define("_CUDA", "1");
}
ProvenanceRange range{allSources.AddIncludedFile(
*sourceFile, ProvenanceRange{}, options.isModuleFile)};
@@ -108,9 +107,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
}
void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const {
- if (preprocessor_) {
- preprocessor_->PrintMacros(out);
- }
+ preprocessor_.PrintMacros(out);
}
void Parsing::EmitPreprocessedSource(
More information about the cfe-commits
mailing list