[flang-commits] [flang] d988991 - [flang] Tokenize all -D macro bodies, and do it better (#168116)
via flang-commits
flang-commits at lists.llvm.org
Wed Nov 19 08:54:04 PST 2025
Author: Peter Klausler
Date: 2025-11-19T08:53:59-08:00
New Revision: d988991f9f6b50941ecbffc316890342147a9f75
URL: https://github.com/llvm/llvm-project/commit/d988991f9f6b50941ecbffc316890342147a9f75
DIFF: https://github.com/llvm/llvm-project/commit/d988991f9f6b50941ecbffc316890342147a9f75.diff
LOG: [flang] Tokenize all -D macro bodies, and do it better (#168116)
The compiler presently tokenizes the bodies of only function-like macro
definitions from the command line, and does so crudely. Tokenize
keyword-like macros too, get character literals right, and handle
numeric constants correctly. (Also delete two needless functions noticed
in characters.h.)
Fixes https://github.com/llvm/llvm-project/issues/168077.
Added:
flang/test/Preprocessing/bug168077.F90
Modified:
flang/include/flang/Parser/characters.h
flang/include/flang/Parser/preprocessor.h
flang/lib/Parser/preprocessor.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Parser/characters.h b/flang/include/flang/Parser/characters.h
index dbdc058c44995..3761700ad348c 100644
--- a/flang/include/flang/Parser/characters.h
+++ b/flang/include/flang/Parser/characters.h
@@ -69,10 +69,6 @@ inline constexpr char ToLowerCaseLetter(char ch) {
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
}
-inline constexpr char ToLowerCaseLetter(char &&ch) {
- return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
-}
-
inline std::string ToLowerCaseLetters(std::string_view str) {
std::string lowered{str};
for (char &ch : lowered) {
@@ -85,10 +81,6 @@ inline constexpr char ToUpperCaseLetter(char ch) {
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
}
-inline constexpr char ToUpperCaseLetter(char &&ch) {
- return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
-}
-
inline std::string ToUpperCaseLetters(std::string_view str) {
std::string raised{str};
for (char &ch : raised) {
diff --git a/flang/include/flang/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h
index bb13b4463fa80..0405d42e64f7b 100644
--- a/flang/include/flang/Parser/preprocessor.h
+++ b/flang/include/flang/Parser/preprocessor.h
@@ -38,6 +38,7 @@ class Definition {
Definition(const std::vector<std::string> &argNames, const TokenSequence &,
std::size_t firstToken, std::size_t tokens, bool isVariadic = false);
Definition(const std::string &predefined, AllSources &);
+ Definition(const TokenSequence &predefined);
bool isFunctionLike() const { return isFunctionLike_; }
std::size_t argumentCount() const { return argNames_.size(); }
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 9176b4db3408a..529d2c345c112 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -43,6 +43,9 @@ Definition::Definition(const std::string &predefined, AllSources &sources)
replacement_{
predefined, sources.AddCompilerInsertion(predefined).start()} {}
+Definition::Definition(const TokenSequence &repl)
+ : isPredefined_{true}, replacement_{repl} {}
+
bool Definition::set_isDisabled(bool disable) {
bool was{isDisabled_};
isDisabled_ = disable;
@@ -371,40 +374,66 @@ TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) {
Provenance provenance{allSources_.AddCompilerInsertion(str).start()};
auto end{str.size()};
for (std::string::size_type at{0}; at < end;) {
- // Alternate between tokens that are identifiers (and therefore subject
- // to argument replacement) and those that are not.
- auto start{str.find_first_of(idChars, at)};
- if (start == str.npos) {
- tokens.Put(str.substr(at), provenance + at);
- break;
- } else if (start > at) {
- tokens.Put(str.substr(at, start - at), provenance + at);
+ char ch{str.at(at)};
+ if (IsWhiteSpace(ch)) {
+ ++at;
+ continue;
}
- at = str.find_first_not_of(idChars, start + 1);
- if (at == str.npos) {
+ std::string::size_type start{at};
+ if (IsLegalIdentifierStart(ch)) {
+ for (++at; at < end && IsLegalInIdentifier(str.at(at)); ++at) {
+ }
+ } else if (IsDecimalDigit(ch) || ch == '.') {
+ for (++at; at < end; ++at) {
+ ch = str.at(at);
+ if (!IsDecimalDigit(ch) && ch != '.') {
+ break;
+ }
+ }
+ if (at < end) {
+ ch = ToUpperCaseLetter(str.at(at));
+ if (ch == 'E' || ch == 'D' || ch == 'Q') {
+ if (++at < end) {
+ ch = str.at(at);
+ if (ch == '+' || ch == '-') {
+ ++at;
+ }
+ for (; at < end && IsDecimalDigit(str.at(at)); ++at) {
+ }
+ }
+ }
+ }
+ } else if (ch == '\'' || ch == '"') {
+ for (++at; at < end && str.at(at) != ch; ++at) {
+ }
+ if (at < end) {
+ ++at;
+ }
+ } else {
+ ++at; // single-character token
+ }
+ if (at >= end || at == str.npos) {
tokens.Put(str.substr(start), provenance + start);
break;
- } else {
- tokens.Put(str.substr(start, at - start), provenance + start);
}
+ tokens.Put(str.substr(start, at - start), provenance + start);
}
return tokens;
}
void Preprocessor::Define(const std::string ¯o, const std::string &value) {
+ TokenSequence rhs{TokenizeMacroBody(value)};
if (auto lhs{TokenizeMacroNameAndArgs(macro)}) {
// function-like macro
CharBlock macroName{SaveTokenAsName(lhs->front())};
auto iter{lhs->begin()};
++iter;
std::vector<std::string> argNames{iter, lhs->end()};
- auto rhs{TokenizeMacroBody(value)};
definitions_.emplace(std::make_pair(macroName,
Definition{
argNames, rhs, 0, rhs.SizeInTokens(), /*isVariadic=*/false}));
} else { // keyword macro
- definitions_.emplace(
- SaveTokenAsName(macro), Definition{value, allSources_});
+ definitions_.emplace(SaveTokenAsName(macro), Definition{rhs});
}
}
diff --git a/flang/test/Preprocessing/bug168077.F90 b/flang/test/Preprocessing/bug168077.F90
new file mode 100644
index 0000000000000..d6c1bb0fa0010
--- /dev/null
+++ b/flang/test/Preprocessing/bug168077.F90
@@ -0,0 +1,6 @@
+!RUN: %flang -E -DNVAR=2+1+0+0 %s 2>&1 | FileCheck %s
+!CHECK: pass
+#if NVAR > 2
+call pass
+#endif
+end
More information about the flang-commits
mailing list