[flang-commits] [flang] [flang] Handle preprocessor macro expansion edge case (PR #73835)

via flang-commits flang-commits at lists.llvm.org
Wed Nov 29 10:10:50 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-parser

Author: Peter Klausler (klausler)

<details>
<summary>Changes</summary>

When a reference to a function-like macro begins during the rescanning of the expansion of another macro but is not completed by the end of that expansion, it is necessary to abort that rescanning of that expansion and try again when more tokens can be acquired.  (See the new unclosed-FLM.F90 test case.) All other Fortran preprocessors to which I have access can handle this situation.

---
Full diff: https://github.com/llvm/llvm-project/pull/73835.diff


3 Files Affected:

- (modified) flang/lib/Parser/preprocessor.cpp (+133-99) 
- (modified) flang/lib/Parser/preprocessor.h (+12-3) 
- (added) flang/test/Preprocessing/unclosed-FLM.F90 (+7) 


``````````diff
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 88efcf71445c879..8c993e7ced0e8c8 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -259,14 +259,15 @@ void Preprocessor::Define(std::string macro, std::string value) {
 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
 
 std::optional<TokenSequence> Preprocessor::MacroReplacement(
-    const TokenSequence &input, Prescanner &prescanner) {
+    const TokenSequence &input, Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
   // Do quick scan for any use of a defined name.
   if (definitions_.empty()) {
     return std::nullopt;
   }
   std::size_t tokens{input.SizeInTokens()};
-  std::size_t j;
-  for (j = 0; j < tokens; ++j) {
+  std::size_t j{0};
+  for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
         IsNameDefined(token)) {
@@ -277,6 +278,38 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     return std::nullopt; // input contains nothing that would be replaced
   }
   TokenSequence result{input, 0, j};
+
+  // After rescanning after macro replacement has failed due to an unclosed
+  // function-like macro call (no left parenthesis yet, or no closing
+  // parenthesis), if tokens remain in the input, append them to the
+  // replacement text and attempt to proceed.  Otherwise, return, so that
+  // the caller may try again with remaining tokens in its input.
+  auto CompleteFunctionLikeMacro{
+      [this, &input, &prescanner, &result, &partialFunctionLikeMacro](
+          std::size_t after, const TokenSequence &replacement,
+          std::size_t pFLMOffset) {
+        if (after < input.SizeInTokens()) {
+          result.Put(replacement, 0, pFLMOffset);
+          TokenSequence suffix;
+          suffix.Put(
+              replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset);
+          suffix.Put(input, after, input.SizeInTokens() - after);
+          auto further{
+              ReplaceMacros(suffix, prescanner, partialFunctionLikeMacro)};
+          if (partialFunctionLikeMacro && *partialFunctionLikeMacro) {
+            // still not closed
+            **partialFunctionLikeMacro += result.SizeInTokens();
+          }
+          result.Put(further);
+          return true;
+        } else {
+          if (partialFunctionLikeMacro) {
+            *partialFunctionLikeMacro = pFLMOffset + result.SizeInTokens();
+          }
+          return false;
+        }
+      }};
+
   for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
@@ -294,20 +327,17 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
       continue;
     }
     if (!def->isFunctionLike()) {
-      bool isRenaming{false};
-      if (def->isPredefined()) {
+      if (def->isPredefined() && !def->replacement().empty()) {
         std::string repl;
-        if (!def->replacement().empty()) {
-          std::string name{def->replacement().TokenAt(0).ToString()};
-          if (name == "__FILE__") {
-            repl = "\""s +
-                allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
-          } else if (name == "__LINE__") {
-            std::string buf;
-            llvm::raw_string_ostream ss{buf};
-            ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
-            repl = ss.str();
-          }
+        std::string name{def->replacement().TokenAt(0).ToString()};
+        if (name == "__FILE__") {
+          repl = "\""s +
+              allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
+        } else if (name == "__LINE__") {
+          std::string buf;
+          llvm::raw_string_ostream ss{buf};
+          ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
+          repl = ss.str();
         }
         if (!repl.empty()) {
           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
@@ -317,105 +347,109 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
           continue;
         }
       }
+      std::optional<std::size_t> partialFLM;
       def->set_isDisabled(true);
-      TokenSequence replaced{
-          TokenPasting(ReplaceMacros(def->replacement(), prescanner))};
+      TokenSequence replaced{TokenPasting(
+          ReplaceMacros(def->replacement(), prescanner, &partialFLM))};
       def->set_isDisabled(false);
-      // Allow a keyword-like macro replacement to be the name of
-      // a function-like macro, possibly surrounded by blanks.
-      std::size_t k{0}, repTokens{replaced.SizeInTokens()};
-      for (; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) {
+        return result;
+      }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetTokenProvenanceRange(j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+    } else {
+      // Possible function-like macro call.  Skip spaces and newlines to see
+      // whether '(' is next.
+      std::size_t k{j};
+      bool leftParen{false};
+      while (++k < tokens) {
+        const CharBlock &lookAhead{input.TokenAt(k)};
+        if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
+          leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
+          break;
+        }
       }
-      if (k < repTokens) {
-        token = replaced.TokenAt(k);
-        for (++k; k < repTokens && replaced.TokenAt(k).IsBlank(); ++k) {
+      if (!leftParen) {
+        if (partialFunctionLikeMacro) {
+          *partialFunctionLikeMacro = result.SizeInTokens();
+          result.Put(input, j, tokens - j);
+          return result;
+        } else {
+          result.Put(input, j);
+          continue;
         }
-        if (k == repTokens && IsLegalIdentifierStart(token[0])) {
-          auto it{definitions_.find(token)};
-          if (it != definitions_.end() && !it->second.isDisabled() &&
-              it->second.isFunctionLike()) {
-            def = &it->second;
-            isRenaming = true;
+      }
+      std::vector<std::size_t> argStart{++k};
+      for (int nesting{0}; k < tokens; ++k) {
+        CharBlock token{input.TokenAt(k)};
+        char ch{token.OnlyNonBlank()};
+        if (ch == '(') {
+          ++nesting;
+        } else if (ch == ')') {
+          if (nesting == 0) {
+            break;
           }
+          --nesting;
+        } else if (ch == ',' && nesting == 0) {
+          argStart.push_back(k + 1);
         }
       }
-      if (!isRenaming) {
-        if (!replaced.empty()) {
-          ProvenanceRange from{def->replacement().GetProvenanceRange()};
-          ProvenanceRange use{input.GetTokenProvenanceRange(j)};
-          ProvenanceRange newRange{
-              allSources_.AddMacroCall(from, use, replaced.ToString())};
-          result.Put(replaced, newRange);
-        }
+      if (argStart.size() == 1 && k == argStart[0] &&
+          def->argumentCount() == 0) {
+        // Subtle: () is zero arguments, not one empty argument,
+        // unless one argument was expected.
+        argStart.clear();
+      }
+      if (k >= tokens && partialFunctionLikeMacro) {
+        *partialFunctionLikeMacro = result.SizeInTokens();
+        result.Put(input, j, tokens - j);
+        return result;
+      } else if (k >= tokens || argStart.size() < def->argumentCount() ||
+          (argStart.size() > def->argumentCount() && !def->isVariadic())) {
+        result.Put(input, j);
         continue;
       }
-    }
-    // Possible function-like macro call.  Skip spaces and newlines to see
-    // whether '(' is next.
-    std::size_t k{j};
-    bool leftParen{false};
-    while (++k < tokens) {
-      const CharBlock &lookAhead{input.TokenAt(k)};
-      if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
-        leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
-        break;
+      std::vector<TokenSequence> args;
+      for (std::size_t n{0}; n < argStart.size(); ++n) {
+        std::size_t at{argStart[n]};
+        std::size_t count{
+            (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
+        args.emplace_back(TokenSequence(input, at, count));
       }
-    }
-    if (!leftParen) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<std::size_t> argStart{++k};
-    for (int nesting{0}; k < tokens; ++k) {
-      CharBlock token{input.TokenAt(k)};
-      char ch{token.OnlyNonBlank()};
-      if (ch == '(') {
-        ++nesting;
-      } else if (ch == ')') {
-        if (nesting == 0) {
-          break;
-        }
-        --nesting;
-      } else if (ch == ',' && nesting == 0) {
-        argStart.push_back(k + 1);
+      TokenSequence applied{def->Apply(args, prescanner)};
+      std::optional<std::size_t> partialFLM;
+      def->set_isDisabled(true);
+      TokenSequence replaced{
+          ReplaceMacros(std::move(applied), prescanner, &partialFLM)};
+      def->set_isDisabled(false);
+      if (partialFLM &&
+          CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) {
+        return result;
       }
+      if (!replaced.empty()) {
+        ProvenanceRange from{def->replacement().GetProvenanceRange()};
+        ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
+        ProvenanceRange newRange{
+            allSources_.AddMacroCall(from, use, replaced.ToString())};
+        result.Put(replaced, newRange);
+      }
+      j = k; // advance to the terminal ')'
     }
-    if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
-      // Subtle: () is zero arguments, not one empty argument,
-      // unless one argument was expected.
-      argStart.clear();
-    }
-    if (k >= tokens || argStart.size() < def->argumentCount() ||
-        (argStart.size() > def->argumentCount() && !def->isVariadic())) {
-      result.Put(input, j);
-      continue;
-    }
-    std::vector<TokenSequence> args;
-    for (std::size_t n{0}; n < argStart.size(); ++n) {
-      std::size_t at{argStart[n]};
-      std::size_t count{
-          (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
-      args.emplace_back(TokenSequence(input, at, count));
-    }
-    TokenSequence applied{def->Apply(args, prescanner)};
-    def->set_isDisabled(true);
-    TokenSequence replaced{ReplaceMacros(std::move(applied), prescanner)};
-    def->set_isDisabled(false);
-    if (!replaced.empty()) {
-      ProvenanceRange from{def->replacement().GetProvenanceRange()};
-      ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
-      ProvenanceRange newRange{
-          allSources_.AddMacroCall(from, use, replaced.ToString())};
-      result.Put(replaced, newRange);
-    }
-    j = k; // advance to the terminal ')'
   }
   return result;
 }
 
-TokenSequence Preprocessor::ReplaceMacros(
-    const TokenSequence &tokens, Prescanner &prescanner) {
-  if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
+TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens,
+    Prescanner &prescanner,
+    std::optional<std::size_t> *partialFunctionLikeMacro) {
+  if (std::optional<TokenSequence> repl{
+          MacroReplacement(tokens, prescanner, partialFunctionLikeMacro)}) {
     return std::move(*repl);
   }
   return tokens;
diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index e0617a490957574..3b456364944c3d6 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -75,8 +75,16 @@ class Preprocessor {
   bool IsNameDefined(const CharBlock &);
   bool IsFunctionLikeDefinition(const CharBlock &);
 
-  std::optional<TokenSequence> MacroReplacement(
-      const TokenSequence &, Prescanner &);
+  // When called with partialFunctionLikeMacro not null, MacroReplacement()
+  // and ReplaceMacros() handle an unclosed function-like macro reference
+  // by terminating macro replacement at the name of the FLM and returning
+  // its index in the result.  This allows the recursive call sites in
+  // MacroReplacement to append any remaining tokens in their inputs to
+  // that result and try again.  All other Fortran preprocessors share this
+  // behavior.
+  std::optional<TokenSequence> MacroReplacement(const TokenSequence &,
+      Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
 
   // Implements a preprocessor directive.
   void Directive(const TokenSequence &, Prescanner &);
@@ -86,7 +94,8 @@ class Preprocessor {
   enum class CanDeadElseAppear { No, Yes };
 
   CharBlock SaveTokenAsName(const CharBlock &);
-  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &);
+  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &,
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
   void SkipDisabledConditionalCode(
       const std::string &, IsElseActive, Prescanner &, ProvenanceRange);
   bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
diff --git a/flang/test/Preprocessing/unclosed-FLM.F90 b/flang/test/Preprocessing/unclosed-FLM.F90
new file mode 100644
index 000000000000000..ed8bdbed2f44e62
--- /dev/null
+++ b/flang/test/Preprocessing/unclosed-FLM.F90
@@ -0,0 +1,7 @@
+! RUN: %flang -E %s | FileCheck %s
+#define A B(c)
+#define B(d) d); call E(d
+#define E(f) G(f)
+!CHECK: call I(c); call G(c)
+call I(A)
+end

``````````

</details>


https://github.com/llvm/llvm-project/pull/73835


More information about the flang-commits mailing list