[flang-commits] [flang] [flang] Handle "defined" in macro expansions (PR #114844)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Mon Nov 4 10:05:02 PST 2024


https://github.com/klausler created https://github.com/llvm/llvm-project/pull/114844

The preprocessor implements "defined(X)" and "defined X" in if/elif directive expressions in such a way that they only work at the top level, not when they appear in macro expansions. Fix that, which is a little tricky due to the need to detect the "defined" keyword before applying any macro expansion to its argument, and add a bunch of tests.

Fixes https://github.com/llvm/llvm-project/issues/114064.

>From ed5e51e3a24c795b7b8a4562e0cde1a4f2f8203b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Mon, 4 Nov 2024 10:00:12 -0800
Subject: [PATCH] [flang] Handle "defined" in macro expansions

The preprocessor implements "defined(X)" and "defined X" in
if/elif directive expressions in such a way that they only
work at the top level, not when they appear in macro expansions.
Fix that, which is a little tricky due to the need to detect
the "defined" keyword before applying any macro expansion to
its argument, and add a bunch of tests.

Fixes https://github.com/llvm/llvm-project/issues/114064.
---
 flang/include/flang/Parser/preprocessor.h     |   9 +-
 flang/include/flang/Parser/token-sequence.h   |   1 +
 flang/lib/Parser/preprocessor.cpp             | 116 +++++++++--------
 flang/lib/Parser/token-sequence.cpp           |  10 ++
 flang/test/Preprocessing/defined-in-macro.F90 | 117 ++++++++++++++++++
 5 files changed, 195 insertions(+), 58 deletions(-)
 create mode 100644 flang/test/Preprocessing/defined-in-macro.F90

diff --git a/flang/include/flang/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h
index 57690dd226f628..f8d33460659814 100644
--- a/flang/include/flang/Parser/preprocessor.h
+++ b/flang/include/flang/Parser/preprocessor.h
@@ -48,7 +48,8 @@ class Definition {
 
   bool set_isDisabled(bool disable);
 
-  TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &);
+  TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &,
+      bool inIfExpression = false);
 
   void Print(llvm::raw_ostream &out, const char *macroName = "") const;
 
@@ -93,7 +94,8 @@ class Preprocessor {
   // behavior.
   std::optional<TokenSequence> MacroReplacement(const TokenSequence &,
       Prescanner &,
-      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr,
+      bool inIfExpression = false);
 
   // Implements a preprocessor directive.
   void Directive(const TokenSequence &, Prescanner &);
@@ -106,7 +108,8 @@ class Preprocessor {
 
   CharBlock SaveTokenAsName(const CharBlock &);
   TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &,
-      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr);
+      std::optional<std::size_t> *partialFunctionLikeMacro = nullptr,
+      bool inIfExpression = false);
   void SkipDisabledConditionalCode(
       const std::string &, IsElseActive, Prescanner &, ProvenanceRange);
   bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
diff --git a/flang/include/flang/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h
index 1f82a3c1a203ac..047c0bed00762a 100644
--- a/flang/include/flang/Parser/token-sequence.h
+++ b/flang/include/flang/Parser/token-sequence.h
@@ -79,6 +79,7 @@ class TokenSequence {
   }
 
   std::size_t SkipBlanks(std::size_t) const;
+  std::optional<std::size_t> SkipBlanksBackwards(std::size_t) const;
 
   // True if anything remains in the sequence at & after the given offset
   // except blanks and line-ending C++ and Fortran free-form comments.
diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index f9b8716941b3bc..6c257f57c2d57c 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -177,8 +177,13 @@ static TokenSequence TokenPasting(TokenSequence &&text) {
   return result;
 }
 
-TokenSequence Definition::Apply(
-    const std::vector<TokenSequence> &args, Prescanner &prescanner) {
+constexpr bool IsDefinedKeyword(CharBlock token) {
+  return token.size() == 7 && (token[0] == 'd' || token[0] == 'D') &&
+      ToLowerCaseLetters(token.ToString()) == "defined";
+}
+
+TokenSequence Definition::Apply(const std::vector<TokenSequence> &args,
+    Prescanner &prescanner, bool inIfExpression) {
   TokenSequence result;
   bool skipping{false};
   int parenthesesNesting{0};
@@ -223,13 +228,16 @@ TokenSequence Definition::Apply(
         const TokenSequence *arg{&args[index]};
         std::optional<TokenSequence> replaced;
         // Don't replace macros in the actual argument if it is preceded or
-        // followed by the token-pasting operator ## in the replacement text.
-        if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
+        // followed by the token-pasting operator ## in the replacement text,
+        // or if we have to worry about "defined(X)"/"defined X" in an
+        // #if/#elif expression.
+        if (!inIfExpression &&
+            (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1)))) {
           auto next{replacement_.SkipBlanks(j + 1)};
           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
             // Apply macro replacement to the actual argument
-            replaced =
-                prescanner.preprocessor().MacroReplacement(*arg, prescanner);
+            replaced = prescanner.preprocessor().MacroReplacement(
+                *arg, prescanner, nullptr, inIfExpression);
             if (replaced) {
               arg = &*replaced;
             }
@@ -301,7 +309,7 @@ void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
 
 std::optional<TokenSequence> Preprocessor::MacroReplacement(
     const TokenSequence &input, Prescanner &prescanner,
-    std::optional<std::size_t> *partialFunctionLikeMacro) {
+    std::optional<std::size_t> *partialFunctionLikeMacro, bool inIfExpression) {
   // Do quick scan for any use of a defined name.
   if (definitions_.empty()) {
     return std::nullopt;
@@ -311,7 +319,7 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
   for (; j < tokens; ++j) {
     CharBlock token{input.TokenAt(j)};
     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
-        IsNameDefined(token)) {
+        (IsNameDefined(token) || (inIfExpression && IsDefinedKeyword(token)))) {
       break;
     }
   }
@@ -326,8 +334,8 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
   // replacement text and attempt to proceed.  Otherwise, return, so that
   // the caller may try again with remaining tokens in its input.
   auto CompleteFunctionLikeMacro{
-      [this, &input, &prescanner, &result, &partialFunctionLikeMacro](
-          std::size_t after, const TokenSequence &replacement,
+      [this, &input, &prescanner, &result, &partialFunctionLikeMacro,
+          inIfExpression](std::size_t after, const TokenSequence &replacement,
           std::size_t pFLMOffset) {
         if (after < input.SizeInTokens()) {
           result.Put(replacement, 0, pFLMOffset);
@@ -335,8 +343,8 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
           suffix.Put(
               replacement, pFLMOffset, replacement.SizeInTokens() - pFLMOffset);
           suffix.Put(input, after, input.SizeInTokens() - after);
-          auto further{
-              ReplaceMacros(suffix, prescanner, partialFunctionLikeMacro)};
+          auto further{ReplaceMacros(
+              suffix, prescanner, partialFunctionLikeMacro, inIfExpression)};
           if (partialFunctionLikeMacro && *partialFunctionLikeMacro) {
             // still not closed
             **partialFunctionLikeMacro += result.SizeInTokens();
@@ -357,7 +365,28 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
       result.Put(input, j);
       continue;
     }
+    // Process identifier in replacement text.
     auto it{definitions_.find(token)};
+    // Is in the X in "defined(X)" or "defined X" in an #if/#elif expression?
+    if (inIfExpression) {
+      if (auto prev{result.SkipBlanksBackwards(result.SizeInTokens())}) {
+        bool ok{true};
+        std::optional<std::size_t> rightParenthesis;
+        if (result.TokenAt(*prev).OnlyNonBlank() == '(') {
+          prev = result.SkipBlanksBackwards(*prev);
+          rightParenthesis = input.SkipBlanks(j + 1);
+          ok = *rightParenthesis < tokens &&
+              input.TokenAt(*rightParenthesis).OnlyNonBlank() == ')';
+        }
+        if (ok && prev && IsDefinedKeyword(result.TokenAt(*prev))) {
+          result = TokenSequence{result, 0, *prev}; // trims off "defined ("
+          char truth{it != definitions_.end() ? '1' : '0'};
+          result.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
+          j = rightParenthesis.value_or(j);
+          continue;
+        }
+      }
+    }
     if (it == definitions_.end()) {
       result.Put(input, j);
       continue;
@@ -403,8 +432,8 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
       }
       std::optional<std::size_t> partialFLM;
       def->set_isDisabled(true);
-      TokenSequence replaced{TokenPasting(
-          ReplaceMacros(def->replacement(), prescanner, &partialFLM))};
+      TokenSequence replaced{TokenPasting(ReplaceMacros(
+          def->replacement(), prescanner, &partialFLM, inIfExpression))};
       def->set_isDisabled(false);
       if (partialFLM &&
           CompleteFunctionLikeMacro(j + 1, replaced, *partialFLM)) {
@@ -476,11 +505,11 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
             (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
         args.emplace_back(TokenSequence(input, at, count));
       }
-      TokenSequence applied{def->Apply(args, prescanner)};
+      TokenSequence applied{def->Apply(args, prescanner, inIfExpression)};
       std::optional<std::size_t> partialFLM;
       def->set_isDisabled(true);
-      TokenSequence replaced{
-          ReplaceMacros(std::move(applied), prescanner, &partialFLM)};
+      TokenSequence replaced{ReplaceMacros(
+          std::move(applied), prescanner, &partialFLM, inIfExpression)};
       def->set_isDisabled(false);
       if (partialFLM &&
           CompleteFunctionLikeMacro(k + 1, replaced, *partialFLM)) {
@@ -501,9 +530,9 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
 
 TokenSequence Preprocessor::ReplaceMacros(const TokenSequence &tokens,
     Prescanner &prescanner,
-    std::optional<std::size_t> *partialFunctionLikeMacro) {
-  if (std::optional<TokenSequence> repl{
-          MacroReplacement(tokens, prescanner, partialFunctionLikeMacro)}) {
+    std::optional<std::size_t> *partialFunctionLikeMacro, bool inIfExpression) {
+  if (std::optional<TokenSequence> repl{MacroReplacement(
+          tokens, prescanner, partialFunctionLikeMacro, inIfExpression)}) {
     return std::move(*repl);
   }
   return tokens;
@@ -1215,50 +1244,27 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
   return left;
 }
 
-bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
+bool Preprocessor::IsIfPredicateTrue(const TokenSequence &directive,
     std::size_t first, std::size_t exprTokens, Prescanner &prescanner) {
-  TokenSequence expr1{expr, first, exprTokens};
-  if (expr1.HasBlanks()) {
-    expr1.RemoveBlanks();
-  }
-  TokenSequence expr2;
-  for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
-    if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
-      CharBlock name;
-      if (j + 3 < expr1.SizeInTokens() &&
-          expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
-          expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
-        name = expr1.TokenAt(j + 2);
-        j += 3;
-      } else if (j + 1 < expr1.SizeInTokens() &&
-          IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
-        name = expr1.TokenAt(++j);
-      }
-      if (!name.empty()) {
-        char truth{IsNameDefined(name) ? '1' : '0'};
-        expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
-        continue;
-      }
-    }
-    expr2.Put(expr1, j);
-  }
-  TokenSequence expr3{ReplaceMacros(expr2, prescanner)};
-  if (expr3.HasBlanks()) {
-    expr3.RemoveBlanks();
+  TokenSequence expr{directive, first, exprTokens};
+  TokenSequence replaced{
+      ReplaceMacros(expr, prescanner, nullptr, /*inIfExpression=*/true)};
+  if (replaced.HasBlanks()) {
+    replaced.RemoveBlanks();
   }
-  if (expr3.empty()) {
+  if (replaced.empty()) {
     prescanner.Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
     return false;
   }
   std::size_t atToken{0};
   std::optional<Message> error;
-  bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
+  bool result{ExpressionValue(replaced, 0, &atToken, &error) != 0};
   if (error) {
     prescanner.Say(std::move(*error));
-  } else if (atToken < expr3.SizeInTokens() &&
-      expr3.TokenAt(atToken).ToString() != "!") {
-    prescanner.Say(expr3.GetIntervalProvenanceRange(
-                       atToken, expr3.SizeInTokens() - atToken),
+  } else if (atToken < replaced.SizeInTokens() &&
+      replaced.TokenAt(atToken).ToString() != "!") {
+    prescanner.Say(replaced.GetIntervalProvenanceRange(
+                       atToken, replaced.SizeInTokens() - atToken),
         atToken == 0 ? "could not parse any expression"_err_en_US
                      : "excess characters after expression"_err_en_US);
   }
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index c73bca1df33de3..cdbe89b1eb441c 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -61,6 +61,16 @@ std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
   return tokens; // even if at > tokens
 }
 
+std::optional<std::size_t> TokenSequence::SkipBlanksBackwards(
+    std::size_t at) const {
+  while (at-- > 0) {
+    if (!TokenAt(at).IsBlank()) {
+      return at;
+    }
+  }
+  return std::nullopt;
+}
+
 // C-style /*comments*/ are removed from preprocessing directive
 // token sequences by the prescanner, but not C++ or Fortran
 // free-form line-ending comments (//...  and !...) because
diff --git a/flang/test/Preprocessing/defined-in-macro.F90 b/flang/test/Preprocessing/defined-in-macro.F90
new file mode 100644
index 00000000000000..9416b9c81b5523
--- /dev/null
+++ b/flang/test/Preprocessing/defined-in-macro.F90
@@ -0,0 +1,117 @@
+! RUN: %flang -E %s 2>&1 | FileCheck %s
+
+! CHECK: print *, 'pass 1'
+#define IS_DEFINED
+#define M1 defined(IS_DEFINED)
+#if M1
+print *, 'pass 1'
+#else
+print *, 'fail 1'
+#endif
+
+! CHECK: print *, 'pass 2'
+#define M2 defined IS_DEFINED
+#if M2
+print *, 'pass 2'
+#else
+print *, 'fail 2'
+#endif
+
+! CHECK: print *, 'pass 3'
+#define M3 defined(IS_UNDEFINED)
+#if M3
+print *, 'fail 3'
+#else
+print *, 'pass 3'
+#endif
+
+! CHECK: print *, 'pass 4'
+#define M4 defined IS_UNDEFINED
+#if M4
+print *, 'fail 4'
+#else
+print *, 'pass 4'
+#endif
+
+! CHECK: print *, 'pass 5'
+#define DEFINED_KEYWORD defined
+#define M5(x) DEFINED_KEYWORD(x)
+#define KWM1 1
+#if M5(KWM1)
+print *, 'pass 5'
+#else
+print *, 'fail 5'
+#endif
+
+! CHECK: print *, 'pass 6'
+#define KWM2 KWM1
+#if M5(KWM2)
+print *, 'pass 6'
+#else
+print *, 'fail 6'
+#endif
+
+! CHECK: print *, 'pass 7'
+#if M5(IS_UNDEFINED)
+print *, 'fail 7'
+#else
+print *, 'pass 7'
+#endif
+
+! CHECK: print *, 'pass 8'
+#define KWM3 IS_UNDEFINED
+#if M5(KWM3)
+print *, 'pass 8'
+#else
+print *, 'fail 8'
+#endif
+
+! CHECK: print *, 'pass 9'
+#define M6(x) defined(x)
+#if M6(KWM1)
+print *, 'pass 9'
+#else
+print *, 'fail 9'
+#endif
+
+! CHECK: print *, 'pass 10'
+#if M6(KWM2)
+print *, 'pass 10'
+#else
+print *, 'fail 10'
+#endif
+
+! CHECK: print *, 'pass 11'
+#if M6(IS_UNDEFINED)
+print *, 'fail 11'
+#else
+print *, 'pass 11'
+#endif
+
+! CHECK: print *, 'pass 12'
+#if M6(KWM3)
+print *, 'pass 12'
+#else
+print *, 'fail 12'
+#endif
+
+! CHECK: print *, 'pass 13'
+#define M7(A, B) ((A) * 10000 + (B) * 100)
+#define M8(A, B, C, AA, BB) ( \
+  (defined(AA) && defined(BB)) && \
+  (M7(A, B) C M7(AA, BB)))
+#if M8(9, 5, >, BAZ, FUX)
+print *, 'fail 13'
+#else
+print *, 'pass 13'
+#endif
+
+! CHECK: print *, 'pass 14'
+#define M9() (defined(IS_UNDEFINED))
+#if M9()
+print *, 'fail 14'
+#else
+print *, 'pass 14'
+#endif
+
+end



More information about the flang-commits mailing list