[flang-commits] [flang] [flang] Handle preprocessor macro expansion edge case (PR #73835)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Wed Nov 29 10:10:17 PST 2023


https://github.com/klausler created https://github.com/llvm/llvm-project/pull/73835

When a reference to a function-like macro begins during the rescanning of the expansion of another macro but is not completed by the end of that expansion, it is necessary to abort that rescanning of that expansion and try again when more tokens can be acquired.  (See the new unclosed-FLM.F90 test case.) All other Fortran preprocessors to which I have access can handle this situation.

>From eb11d2d7df6f5974c6f7c4635491d2965fe9c06b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Tue, 28 Nov 2023 16:51:01 -0800
Subject: [PATCH] [flang] Handle preprocessor macro expansion edge case

When a reference to a function-like macro begins during the
rescanning of the expansion of another macro but is not completed
by the end of that expansion, it is necessary to abort that
rescanning of that expansion and try again when more tokens can
be acquired.  (See the new unclosed-FLM.F90 test case.) All
other Fortran preprocessors to which I have access can handle
this situation.
---
 flang/lib/Parser/preprocessor.cpp         | 232 +++++++++++++---------
 flang/lib/Parser/preprocessor.h           |  15 +-
 flang/test/Preprocessing/unclosed-FLM.F90 |   7 +
 3 files changed, 152 insertions(+), 102 deletions(-)
 create mode 100644 flang/test/Preprocessing/unclosed-FLM.F90

diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 88efcf71445c879..8c993e7ced0e8c8 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -259,14 +259,15 @@ void Preprocessor::Define(std::string macro, std::string value) {
 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
 
 std::optional<TokenSequence> Preprocessor::MacroReplacement(
-    const TokenSequence &input, Prescanner &prescanner) {
+    const TokenSequence &input, Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
   // Do quick scan for any use of a defined name.
   if (definitions_.empty()) {
     return std::nullopt;
   }
   std::size_t tokens{input.SizeInTokens()};
-  std::size_t j;
-  for (j = 0; j < tokens; ++j) {
+  std::size_t j{0};
+  for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
         IsNameDefined(token)) {
@@ -277,6 +278,38 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     return std::nullopt; // input contains nothing that would be replaced
   }
   TokenSequence result{input, 0, j};
+
+  // After rescanning after macro replacement has failed due to an unclosed
+  // function-like macro call (no left parenthesis yet, or no closing
+  // parenthesis), if tokens remain in the input, append them to the
+  // replacement text and attempt to proceed.  Otherwise, return, so that
+  // the caller may try again with remaining tokens in its input.
+  auto CompleteFunctionLikeMacro{
+      [this, &input, &prescanner, &result, &partialFunctionLikeMacro](
+          std::size_t after, const TokenSequence &replacement,
+          std::size_t pFLMOffset) {
+        if (after < input.SizeInTokens()) {
+          result.Put(replacement, 0, pFLMOffset);
+          TokenSequence suffix;
+          suffix.Put(
+              replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset);
+          suffix.Put(input, after, input.SizeInTokens() - after);
+          auto further{
+              ReplaceMacros(suffix, prescanner, partialFunctionLikeMacro)};
+          if (partialFunctionLikeMacro && *partialFunctionLikeMacro) {
+            // still not closed
+            **partialFunctionLikeMacro += result.SizeInTokens();
+          }
+          result.Put(further);
+          return true;
+        } else {
+          if (partialFunctionLikeMacro) {
+            *partialFunctionLikeMacro = pFLMOffset + result.SizeInTokens();
+          }
+          return false;
+        }
+      }};
+
   for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
@@ -294,20 +327,17 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
       continue;
     }
     if (!def->isFunctionLike()) {
-      bool isRenaming{false};
-      if (def->isPredefined()) {
+      if (def->isPredefined() && !def->replacement().empty()) {
         std::string repl;
-        if (!def->replacement().empty()) {
-          std::string name{def->replacement().TokenAt(0).ToString()};
-          if (name == "__FILE__") {
-            repl = "\""s +
-                allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
-          } else if (name == "__LINE__") {
-            std::string buf;
-            llvm::raw_string_ostream ss{buf};
-            ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
-            repl = ss.str();
-          }
+        std::string name{def->replacement().TokenAt(0).ToString()};
+        if (name == "__FILE__") {
+          repl = "\""s +
+              allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
+        } else if (name == "__LINE__") {
+          std::string buf;
+          llvm::raw_string_ostream ss{buf};
+          ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
+          repl = ss.str();
         }
         if (!repl.empty()) {
           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
@@ -317,105 +347,109 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
           continue;
         }
       }
+      std::optional<std::size_t> partialFLM;
       def->set_isDisabled(true);
-      TokenSequence replaced{
-          TokenPasting(ReplaceMacros(def->replacement(), prescanner))};
+      TokenSequence replaced{TokenPasting(
+          ReplaceMacros(def->replacement(), prescanner, &partialFLM))};
       def->set_isDisabled(false);
-      // Allow a keyword-like macro replacement to be the name of
-      // a function-like macro, possibly surrounded by blanks.
-      std::size_t k{0}, repTokens{replaced.SizeInTokens()};
-      for (; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) {
+        return result;
+      }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetTokenProvenanceRange(j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+    } else {
+      // Possible function-like macro call.  Skip spaces and newlines to see
+      // whether '(' is next.
+      std::size_t k{j};
+      bool leftParen{false};
+      while (++k < tokens) {
+        const CharBlock &lookAhead{input.TokenAt(k)};
+        if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
+          leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
+          break;
+        }
       }
-      if (k < repTokens) {
-        token = replaced.TokenAt(k);
-        for (++k; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (!leftParen) {
+        if (partialFunctionLikeMacro) {
+          *partialFunctionLikeMacro = result.SizeInTokens();
+          result.Put(input, j, tokens - j);
+          return result;
+        } else {
+          result.Put(input, j);
+          continue;
         }
-        if (k == repTokens && IsLegalIdentifierStart(token[0])) {
-          auto it{definitions_.find(token)};
-          if (it != definitions_.end() && !it->second.isDisabled() &&
-              it->second.isFunctionLike()) {
-            def = &it->second;
-            isRenaming = true;
+      }
+      std::vector<std::size_t> argStart{++k};
+      for (int nesting{0}; k < tokens; ++k) {
+        CharBlock token{input.TokenAt(k)};
+        char ch{token.OnlyNonBlank()};
+        if (ch == '(') {
+          ++nesting;
+        } else if (ch == ')') {
+          if (nesting == 0) {
+            break;
           }
+          --nesting;
+        } else if (ch == ',' && nesting == 0) {
+          argStart.push_back(k + 1);
         }
       }
-      if (!isRenaming) {
-        if (!replaced.empty()) {
-          ProvenanceRange from{def->replacement().GetProvenanceRange()};
-          ProvenanceRange use{input.GetTokenProvenanceRange(j)};
-          ProvenanceRange newRange{
-              allSources_.AddMacroCall(from, use, replaced.ToString())};
-          result.Put(replaced, newRange);
-        }
+      if (argStart.size() == 1 && k == argStart[0] &&
+          def->argumentCount() == 0) {
+        // Subtle: () is zero arguments, not one empty argument,
+        // unless one argument was expected.
+        argStart.clear();
+      }
+      if (k >= tokens && partialFunctionLikeMacro) {
+        *partialFunctionLikeMacro = result.SizeInTokens();
+        result.Put(input, j, tokens - j);
+        return result;
+      } else if (k >= tokens || argStart.size() < def->argumentCount() ||
+          (argStart.size() > def->argumentCount() && !def->isVariadic())) {
+        result.Put(input, j);
         continue;
       }
-    }
-    // Possible function-like macro call.  Skip spaces and newlines to see
-    // whether '(' is next.
-    std::size_t k{j};
-    bool leftParen{false};
-    while (++k < tokens) {
-      const CharBlock &lookAhead{input.TokenAt(k)};
-      if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
-        leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
-        break;
+      std::vector<TokenSequence> args;
+      for (std::size_t n{0}; n < argStart.size(); ++n) {
+        std::size_t at{argStart[n]};
+        std::size_t count{
+            (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
+        args.emplace_back(TokenSequence(input, at, count));
       }
-    }
-    if (!leftParen) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<std::size_t> argStart{++k};
-    for (int nesting{0}; k < tokens; ++k) {
-      CharBlock token{input.TokenAt(k)};
-      char ch{token.OnlyNonBlank()};
-      if (ch == '(') {
-        ++nesting;
-      } else if (ch == ')') {
-        if (nesting == 0) {
-          break;
-        }
-        --nesting;
-      } else if (ch == ',' && nesting == 0) {
-        argStart.push_back(k + 1);
+      TokenSequence applied{def->Apply(args, prescanner)};
+      std::optional<std::size_t> partialFLM;
+      def->set_isDisabled(true);
+      TokenSequence replaced{
+          ReplaceMacros(std::move(applied), prescanner, &partialFLM)};
+      def->set_isDisabled(false);
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) {
+        return result;
       }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+      j = k; // advance to the terminal ')'
     }
-    if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
-      // Subtle: () is zero arguments, not one empty argument,
-      // unless one argument was expected.
-      argStart.clear();
-    }
-    if (k >= tokens || argStart.size() < def->argumentCount() ||
-        (argStart.size() > def->argumentCount() && !def->isVariadic())) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<TokenSequence> args;
-    for (std::size_t n{0}; n < argStart.size(); ++n) {
-      std::size_t at{argStart[n]};
-      std::size_t count{
-          (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
-      args.emplace_back(TokenSequence(input, at, count));
-    }
-    TokenSequence applied{def->Apply(args, prescanner)};
-    def->set_isDisabled(true);
-    TokenSequence replaced{ReplaceMacros(std::move(applied), prescanner)};
-    def->set_isDisabled(false);
-    if (!replaced.empty()) {
-      ProvenanceRange from{def->replacement().GetProvenanceRange()};
-      ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
-      ProvenanceRange newRange{
-          allSources_.AddMacroCall(from, use, replaced.ToString())};
-      result.Put(replaced, newRange);
-    }
-    j = k; // advance to the terminal ')'
   }
   return result;
 }
 
-TokenSequence Preprocessor::ReplaceMacros(
-    const TokenSequence &tokens, Prescanner &prescanner) {
-  if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
+TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens,
+    Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
+  if (std::optional<TokenSequence> repl{
+          MacroReplacement(tokens, prescanner, partialFunctionLikeMacro)}) {
     return std::move(*repl);
   }
   return tokens;
diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index e0617a490957574..3b456364944c3d6 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -75,8 +75,16 @@ class Preprocessor {
   bool IsNameDefined(const CharBlock &);
   bool IsFunctionLikeDefinition(const CharBlock &);
 
-  std::optional<TokenSequence> MacroReplacement(
-      const TokenSequence &, Prescanner &);
+  // When called with partialFunctionLikeMacro not null, MacroReplacement()
+  // and ReplaceMacros() handle an unclosed function-like macro reference
+  // by terminating macro replacement at the name of the FLM and returning
+  // its index in the result.  This allows the recursive call sites in
+  // MacroReplacement to append any remaining tokens in their inputs to
+  // that result and try again.  All other Fortran preprocessors share this
+  // behavior.
+  std::optional<TokenSequence> MacroReplacement(const TokenSequence &,
+      Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
 
   // Implements a preprocessor directive.
   void Directive(const TokenSequence &, Prescanner &);
@@ -86,7 +94,8 @@ class Preprocessor {
   enum class CanDeadElseAppear { No, Yes };
 
   CharBlock SaveTokenAsName(const CharBlock &);
-  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &);
+  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
   void SkipDisabledConditionalCode(
       const std::string &, IsElseActive, Prescanner &, ProvenanceRange);
   bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
diff --git a/flang/test/Preprocessing/unclosed-FLM.F90 b/flang/test/Preprocessing/unclosed-FLM.F90
new file mode 100644
index 000000000000000..ed8bdbed2f44e62
--- /dev/null
+++ b/flang/test/Preprocessing/unclosed-FLM.F90
@@ -0,0 +1,7 @@
+! RUN: %flang -E %s | FileCheck %s
+#define A B(c)
+#define B(d) d); call E(d
+#define E(f) G(f)
+!CHECK: call I(c); call G(c)
+call I(A)
+end



More information about the flang-commits mailing list