[flang-commits] [flang] d988991 - [flang] Tokenize all -D macro bodies, and do it better (#168116)

via flang-commits flang-commits at lists.llvm.org
Wed Nov 19 08:54:04 PST 2025


Author: Peter Klausler
Date: 2025-11-19T08:53:59-08:00
New Revision: d988991f9f6b50941ecbffc316890342147a9f75

URL: https://github.com/llvm/llvm-project/commit/d988991f9f6b50941ecbffc316890342147a9f75
DIFF: https://github.com/llvm/llvm-project/commit/d988991f9f6b50941ecbffc316890342147a9f75.diff

LOG: [flang] Tokenize all -D macro bodies, and do it better (#168116)

The compiler presently tokenizes the bodies of only function-like macro
definitions from the command line, and does so crudely. Tokenize
keyword-like macros too, get character literals right, and handle
numeric constants correctly. (Also delete two needless functions noticed
in characters.h.)

Fixes https://github.com/llvm/llvm-project/issues/168077.

Added: 
    flang/test/Preprocessing/bug168077.F90

Modified: 
    flang/include/flang/Parser/characters.h
    flang/include/flang/Parser/preprocessor.h
    flang/lib/Parser/preprocessor.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Parser/characters.h b/flang/include/flang/Parser/characters.h
index dbdc058c44995..3761700ad348c 100644
--- a/flang/include/flang/Parser/characters.h
+++ b/flang/include/flang/Parser/characters.h
@@ -69,10 +69,6 @@ inline constexpr char ToLowerCaseLetter(char ch) {
   return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
 }
 
-inline constexpr char ToLowerCaseLetter(char &&ch) {
-  return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
-}
-
 inline std::string ToLowerCaseLetters(std::string_view str) {
   std::string lowered{str};
   for (char &ch : lowered) {
@@ -85,10 +81,6 @@ inline constexpr char ToUpperCaseLetter(char ch) {
   return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
 }
 
-inline constexpr char ToUpperCaseLetter(char &&ch) {
-  return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
-}
-
 inline std::string ToUpperCaseLetters(std::string_view str) {
   std::string raised{str};
   for (char &ch : raised) {

diff  --git a/flang/include/flang/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h
index bb13b4463fa80..0405d42e64f7b 100644
--- a/flang/include/flang/Parser/preprocessor.h
+++ b/flang/include/flang/Parser/preprocessor.h
@@ -38,6 +38,7 @@ class Definition {
   Definition(const std::vector<std::string> &argNames, const TokenSequence &,
       std::size_t firstToken, std::size_t tokens, bool isVariadic = false);
   Definition(const std::string &predefined, AllSources &);
+  Definition(const TokenSequence &predefined);
 
   bool isFunctionLike() const { return isFunctionLike_; }
   std::size_t argumentCount() const { return argNames_.size(); }

diff  --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 9176b4db3408a..529d2c345c112 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -43,6 +43,9 @@ Definition::Definition(const std::string &predefined, AllSources &sources)
       replacement_{
           predefined, sources.AddCompilerInsertion(predefined).start()} {}
 
+Definition::Definition(const TokenSequence &repl)
+    : isPredefined_{true}, replacement_{repl} {}
+
 bool Definition::set_isDisabled(bool disable) {
   bool was{isDisabled_};
   isDisabled_ = disable;
@@ -371,40 +374,66 @@ TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) {
   Provenance provenance{allSources_.AddCompilerInsertion(str).start()};
   auto end{str.size()};
   for (std::string::size_type at{0}; at < end;) {
-    // Alternate between tokens that are identifiers (and therefore subject
-    // to argument replacement) and those that are not.
-    auto start{str.find_first_of(idChars, at)};
-    if (start == str.npos) {
-      tokens.Put(str.substr(at), provenance + at);
-      break;
-    } else if (start > at) {
-      tokens.Put(str.substr(at, start - at), provenance + at);
+    char ch{str.at(at)};
+    if (IsWhiteSpace(ch)) {
+      ++at;
+      continue;
     }
-    at = str.find_first_not_of(idChars, start + 1);
-    if (at == str.npos) {
+    std::string::size_type start{at};
+    if (IsLegalIdentifierStart(ch)) {
+      for (++at; at < end && IsLegalInIdentifier(str.at(at)); ++at) {
+      }
+    } else if (IsDecimalDigit(ch) || ch == '.') {
+      for (++at; at < end; ++at) {
+        ch = str.at(at);
+        if (!IsDecimalDigit(ch) && ch != '.') {
+          break;
+        }
+      }
+      if (at < end) {
+        ch = ToUpperCaseLetter(str.at(at));
+        if (ch == 'E' || ch == 'D' || ch == 'Q') {
+          if (++at < end) {
+            ch = str.at(at);
+            if (ch == '+' || ch == '-') {
+              ++at;
+            }
+            for (; at < end && IsDecimalDigit(str.at(at)); ++at) {
+            }
+          }
+        }
+      }
+    } else if (ch == '\'' || ch == '"') {
+      for (++at; at < end && str.at(at) != ch; ++at) {
+      }
+      if (at < end) {
+        ++at;
+      }
+    } else {
+      ++at; // single-character token
+    }
+    if (at >= end || at == str.npos) {
       tokens.Put(str.substr(start), provenance + start);
       break;
-    } else {
-      tokens.Put(str.substr(start, at - start), provenance + start);
     }
+    tokens.Put(str.substr(start, at - start), provenance + start);
   }
   return tokens;
 }
 
 void Preprocessor::Define(const std::string &macro, const std::string &value) {
+  TokenSequence rhs{TokenizeMacroBody(value)};
   if (auto lhs{TokenizeMacroNameAndArgs(macro)}) {
     // function-like macro
     CharBlock macroName{SaveTokenAsName(lhs->front())};
     auto iter{lhs->begin()};
     ++iter;
     std::vector<std::string> argNames{iter, lhs->end()};
-    auto rhs{TokenizeMacroBody(value)};
     definitions_.emplace(std::make_pair(macroName,
         Definition{
             argNames, rhs, 0, rhs.SizeInTokens(), /*isVariadic=*/false}));
   } else { // keyword macro
-    definitions_.emplace(
-        SaveTokenAsName(macro), Definition{value, allSources_});
+    definitions_.emplace(SaveTokenAsName(macro), Definition{rhs});
   }
 }
 

diff  --git a/flang/test/Preprocessing/bug168077.F90 b/flang/test/Preprocessing/bug168077.F90
new file mode 100644
index 0000000000000..d6c1bb0fa0010
--- /dev/null
+++ b/flang/test/Preprocessing/bug168077.F90
@@ -0,0 +1,6 @@
+!RUN: %flang -E -DNVAR=2+1+0+0 %s 2>&1 | FileCheck %s
+!CHECK: pass
+#if NVAR > 2
+call pass
+#endif
+end


        


More information about the flang-commits mailing list