[flang-commits] [flang] d3e5c20 - [flang] Handle preprocessor macro expansion edge case (#73835)

via flang-commits flang-commits at lists.llvm.org
Thu Nov 30 13:23:21 PST 2023


Author: Peter Klausler
Date: 2023-11-30T13:23:14-08:00
New Revision: d3e5c20ab846303874a2a25e5877c72271fc798b

URL: https://github.com/llvm/llvm-project/commit/d3e5c20ab846303874a2a25e5877c72271fc798b
DIFF: https://github.com/llvm/llvm-project/commit/d3e5c20ab846303874a2a25e5877c72271fc798b.diff

LOG: [flang] Handle preprocessor macro expansion edge case (#73835)

When a reference to a function-like macro begins during the rescanning
of the expansion of another macro but is not completed by the end of
that expansion, it is necessary to abort that rescanning of that
expansion and try again when more tokens can be acquired. (See the new
unclosed-FLM.F90 test case.) All other Fortran preprocessors to which I
have access can handle this situation.

Added: 
    flang/test/Preprocessing/unclosed-FLM.F90

Modified: 
    flang/lib/Parser/preprocessor.cpp
    flang/lib/Parser/preprocessor.h

Removed: 
    


################################################################################
diff  --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 88efcf71445c879..8c993e7ced0e8c8 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -259,14 +259,15 @@ void Preprocessor::Define(std::string macro, std::string value) {
 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
 
 std::optional<TokenSequence> Preprocessor::MacroReplacement(
-    const TokenSequence &input, Prescanner &prescanner) {
+    const TokenSequence &input, Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
   // Do quick scan for any use of a defined name.
   if (definitions_.empty()) {
     return std::nullopt;
   }
   std::size_t tokens{input.SizeInTokens()};
-  std::size_t j;
-  for (j = 0; j < tokens; ++j) {
+  std::size_t j{0};
+  for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
         IsNameDefined(token)) {
@@ -277,6 +278,38 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     return std::nullopt; // input contains nothing that would be replaced
   }
   TokenSequence result{input, 0, j};
+
+  // After rescanning after macro replacement has failed due to an unclosed
+  // function-like macro call (no left parenthesis yet, or no closing
+  // parenthesis), if tokens remain in the input, append them to the
+  // replacement text and attempt to proceed.  Otherwise, return, so that
+  // the caller may try again with remaining tokens in its input.
+  auto CompleteFunctionLikeMacro{
+      [this, &input, &prescanner, &result, &partialFunctionLikeMacro](
+          std::size_t after, const TokenSequence &replacement,
+          std::size_t pFLMOffset) {
+        if (after < input.SizeInTokens()) {
+          result.Put(replacement, 0, pFLMOffset);
+          TokenSequence suffix;
+          suffix.Put(
+              replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset);
+          suffix.Put(input, after, input.SizeInTokens() - after);
+          auto further{
+              ReplaceMacros(suffix, prescanner, partialFunctionLikeMacro)};
+          if (partialFunctionLikeMacro && *partialFunctionLikeMacro) {
+            // still not closed
+            **partialFunctionLikeMacro += result.SizeInTokens();
+          }
+          result.Put(further);
+          return true;
+        } else {
+          if (partialFunctionLikeMacro) {
+            *partialFunctionLikeMacro = pFLMOffset + result.SizeInTokens();
+          }
+          return false;
+        }
+      }};
+
   for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
@@ -294,20 +327,17 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
       continue;
     }
     if (!def->isFunctionLike()) {
-      bool isRenaming{false};
-      if (def->isPredefined()) {
+      if (def->isPredefined() && !def->replacement().empty()) {
         std::string repl;
-        if (!def->replacement().empty()) {
-          std::string name{def->replacement().TokenAt(0).ToString()};
-          if (name == "__FILE__") {
-            repl = "\""s +
-                allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
-          } else if (name == "__LINE__") {
-            std::string buf;
-            llvm::raw_string_ostream ss{buf};
-            ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
-            repl = ss.str();
-          }
+        std::string name{def->replacement().TokenAt(0).ToString()};
+        if (name == "__FILE__") {
+          repl = "\""s +
+              allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
+        } else if (name == "__LINE__") {
+          std::string buf;
+          llvm::raw_string_ostream ss{buf};
+          ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
+          repl = ss.str();
         }
         if (!repl.empty()) {
           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
@@ -317,105 +347,109 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
           continue;
         }
       }
+      std::optional<std::size_t> partialFLM;
       def->set_isDisabled(true);
-      TokenSequence replaced{
-          TokenPasting(ReplaceMacros(def->replacement(), prescanner))};
+      TokenSequence replaced{TokenPasting(
+          ReplaceMacros(def->replacement(), prescanner, &partialFLM))};
       def->set_isDisabled(false);
-      // Allow a keyword-like macro replacement to be the name of
-      // a function-like macro, possibly surrounded by blanks.
-      std::size_t k{0}, repTokens{replaced.SizeInTokens()};
-      for (; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) {
+        return result;
+      }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetTokenProvenanceRange(j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+    } else {
+      // Possible function-like macro call.  Skip spaces and newlines to see
+      // whether '(' is next.
+      std::size_t k{j};
+      bool leftParen{false};
+      while (++k < tokens) {
+        const CharBlock &lookAhead{input.TokenAt(k)};
+        if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
+          leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
+          break;
+        }
       }
-      if (k < repTokens) {
-        token = replaced.TokenAt(k);
-        for (++k; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (!leftParen) {
+        if (partialFunctionLikeMacro) {
+          *partialFunctionLikeMacro = result.SizeInTokens();
+          result.Put(input, j, tokens - j);
+          return result;
+        } else {
+          result.Put(input, j);
+          continue;
         }
-        if (k == repTokens && IsLegalIdentifierStart(token[0])) {
-          auto it{definitions_.find(token)};
-          if (it != definitions_.end() && !it->second.isDisabled() &&
-              it->second.isFunctionLike()) {
-            def = &it->second;
-            isRenaming = true;
+      }
+      std::vector<std::size_t> argStart{++k};
+      for (int nesting{0}; k < tokens; ++k) {
+        CharBlock token{input.TokenAt(k)};
+        char ch{token.OnlyNonBlank()};
+        if (ch == '(') {
+          ++nesting;
+        } else if (ch == ')') {
+          if (nesting == 0) {
+            break;
           }
+          --nesting;
+        } else if (ch == ',' && nesting == 0) {
+          argStart.push_back(k + 1);
         }
       }
-      if (!isRenaming) {
-        if (!replaced.empty()) {
-          ProvenanceRange from{def->replacement().GetProvenanceRange()};
-          ProvenanceRange use{input.GetTokenProvenanceRange(j)};
-          ProvenanceRange newRange{
-              allSources_.AddMacroCall(from, use, replaced.ToString())};
-          result.Put(replaced, newRange);
-        }
+      if (argStart.size() == 1 && k == argStart[0] &&
+          def->argumentCount() == 0) {
+        // Subtle: () is zero arguments, not one empty argument,
+        // unless one argument was expected.
+        argStart.clear();
+      }
+      if (k >= tokens && partialFunctionLikeMacro) {
+        *partialFunctionLikeMacro = result.SizeInTokens();
+        result.Put(input, j, tokens - j);
+        return result;
+      } else if (k >= tokens || argStart.size() < def->argumentCount() ||
+          (argStart.size() > def->argumentCount() && !def->isVariadic())) {
+        result.Put(input, j);
         continue;
       }
-    }
-    // Possible function-like macro call.  Skip spaces and newlines to see
-    // whether '(' is next.
-    std::size_t k{j};
-    bool leftParen{false};
-    while (++k < tokens) {
-      const CharBlock &lookAhead{input.TokenAt(k)};
-      if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
-        leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
-        break;
+      std::vector<TokenSequence> args;
+      for (std::size_t n{0}; n < argStart.size(); ++n) {
+        std::size_t at{argStart[n]};
+        std::size_t count{
+            (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
+        args.emplace_back(TokenSequence(input, at, count));
       }
-    }
-    if (!leftParen) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<std::size_t> argStart{++k};
-    for (int nesting{0}; k < tokens; ++k) {
-      CharBlock token{input.TokenAt(k)};
-      char ch{token.OnlyNonBlank()};
-      if (ch == '(') {
-        ++nesting;
-      } else if (ch == ')') {
-        if (nesting == 0) {
-          break;
-        }
-        --nesting;
-      } else if (ch == ',' && nesting == 0) {
-        argStart.push_back(k + 1);
+      TokenSequence applied{def->Apply(args, prescanner)};
+      std::optional<std::size_t> partialFLM;
+      def->set_isDisabled(true);
+      TokenSequence replaced{
+          ReplaceMacros(std::move(applied), prescanner, &partialFLM)};
+      def->set_isDisabled(false);
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) {
+        return result;
       }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+      j = k; // advance to the terminal ')'
     }
-    if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
-      // Subtle: () is zero arguments, not one empty argument,
-      // unless one argument was expected.
-      argStart.clear();
-    }
-    if (k >= tokens || argStart.size() < def->argumentCount() ||
-        (argStart.size() > def->argumentCount() && !def->isVariadic())) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<TokenSequence> args;
-    for (std::size_t n{0}; n < argStart.size(); ++n) {
-      std::size_t at{argStart[n]};
-      std::size_t count{
-          (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
-      args.emplace_back(TokenSequence(input, at, count));
-    }
-    TokenSequence applied{def->Apply(args, prescanner)};
-    def->set_isDisabled(true);
-    TokenSequence replaced{ReplaceMacros(std::move(applied), prescanner)};
-    def->set_isDisabled(false);
-    if (!replaced.empty()) {
-      ProvenanceRange from{def->replacement().GetProvenanceRange()};
-      ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
-      ProvenanceRange newRange{
-          allSources_.AddMacroCall(from, use, replaced.ToString())};
-      result.Put(replaced, newRange);
-    }
-    j = k; // advance to the terminal ')'
   }
   return result;
 }
 
-TokenSequence Preprocessor::ReplaceMacros(
-    const TokenSequence &tokens, Prescanner &prescanner) {
-  if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
+TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens,
+    Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
+  if (std::optional<TokenSequence> repl{
+          MacroReplacement(tokens, prescanner, partialFunctionLikeMacro)}) {
     return std::move(*repl);
   }
   return tokens;

diff  --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index e0617a490957574..3b456364944c3d6 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -75,8 +75,16 @@ class Preprocessor {
   bool IsNameDefined(const CharBlock &);
   bool IsFunctionLikeDefinition(const CharBlock &);
 
-  std::optional<TokenSequence> MacroReplacement(
-      const TokenSequence &, Prescanner &);
+  // When called with partialFunctionLikeMacro not null, MacroReplacement()
+  // and ReplaceMacros() handle an unclosed function-like macro reference
+  // by terminating macro replacement at the name of the FLM and returning
+  // its index in the result.  This allows the recursive call sites in
+  // MacroReplacement to append any remaining tokens in their inputs to
+  // that result and try again.  All other Fortran preprocessors share this
+  // behavior.
+  std::optional<TokenSequence> MacroReplacement(const TokenSequence &,
+      Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
 
   // Implements a preprocessor directive.
   void Directive(const TokenSequence &, Prescanner &);
@@ -86,7 +94,8 @@ class Preprocessor {
   enum class CanDeadElseAppear { No, Yes };
 
   CharBlock SaveTokenAsName(const CharBlock &);
-  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &);
+  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
   void SkipDisabledConditionalCode(
       const std::string &, IsElseActive, Prescanner &, ProvenanceRange);
   bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,

diff  --git a/flang/test/Preprocessing/unclosed-FLM.F90 b/flang/test/Preprocessing/unclosed-FLM.F90
new file mode 100644
index 000000000000000..ed8bdbed2f44e62
--- /dev/null
+++ b/flang/test/Preprocessing/unclosed-FLM.F90
@@ -0,0 +1,7 @@
+! RUN: %flang -E %s | FileCheck %s
+#define A B(c)
+#define B(d) d); call E(d
+#define E(f) G(f)
+!CHECK: call I(c); call G(c)
+call I(A)
+end


        


More information about the flang-commits mailing list