[flang-commits] [flang] [flang] Accept directive sentinels in macro-replaced source better (PR #70699)

via flang-commits flang-commits at lists.llvm.org
Mon Oct 30 11:27:31 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-parser

Author: Peter Klausler (klausler)

<details>
<summary>Changes</summary>

At present, the prescanner emits an error if a source line or compiler directive, after macro replacement or not, contains a token with a non-Fortran character.  In the particular case of the '!' character, the code that checks for bad character will accept the '!' if it appears after a ';', since the '!' might begin a compiler directive.

This current implementation fails when a compiler directive appears after some other character that might (by means of further source processing not visible to the prescanner) be replaced with a ';' or newline.

Extend the bad character check for '!' to actually check for a compiler directive sentinel instead.

---
Full diff: https://github.com/llvm/llvm-project/pull/70699.diff


5 Files Affected:

- (modified) flang/lib/Parser/prescan.cpp (+19-4) 
- (modified) flang/lib/Parser/prescan.h (+2) 
- (modified) flang/lib/Parser/token-sequence.cpp (+12-10) 
- (modified) flang/lib/Parser/token-sequence.h (+2-1) 
- (added) flang/test/Preprocessing/preprocessed-dirs.F90 (+8) 


``````````diff
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 2f25b02bf7a323d..c9da6534ed88ba8 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -205,7 +205,7 @@ void Prescanner::Statement() {
       Say(preprocessed->GetProvenanceRange(),
           "Preprocessed line resembles a preprocessor directive"_warn_en_US);
       preprocessed->ToLowerCase()
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -217,7 +217,7 @@ void Prescanner::Statement() {
       preprocessed->ToLowerCase();
       SourceFormChange(preprocessed->ToString());
       preprocessed->ClipComment(*this, true /* skip first ! */)
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -233,7 +233,7 @@ void Prescanner::Statement() {
       }
       preprocessed->ToLowerCase()
           .ClipComment(*this)
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -246,7 +246,7 @@ void Prescanner::Statement() {
     if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
       EnforceStupidEndStatementRules(tokens);
     }
-    tokens.CheckBadFortranCharacters(messages_)
+    tokens.CheckBadFortranCharacters(messages_, *this)
         .CheckBadParentheses(messages_)
         .Emit(cooked_);
   }
@@ -1266,6 +1266,21 @@ const char *Prescanner::IsCompilerDirectiveSentinel(
   return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
 }
 
+const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
+  const char *p{token.begin()};
+  const char *end{p + token.size()};
+  while (p < end && (*p == ' ' || *p == '\n')) {
+    ++p;
+  }
+  if (p < end && *p == '!') {
+    ++p;
+  }
+  while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
+    --end;
+  }
+  return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
+}
+
 constexpr bool IsDirective(const char *match, const char *dir) {
   for (; *match; ++match) {
     if (*match != ToLowerCaseLetter(*dir++)) {
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 021632657a98c13..276fa19a4b1c64c 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -68,7 +68,9 @@ class Prescanner {
   bool IsNextLinePreprocessorDirective() const;
   TokenSequence TokenizePreprocessorDirective();
   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
+
   const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
+  const char *IsCompilerDirectiveSentinel(CharBlock) const;
 
   template <typename... A> Message &Say(A &&...a) {
     return messages_.Say(std::forward<A>(a)...);
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 139d2e1ba811d63..c5a630c471d16ea 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -343,16 +343,23 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const {
 }
 
 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
-    Messages &messages) const {
+    Messages &messages, const Prescanner &prescanner) const {
   std::size_t tokens{SizeInTokens()};
-  bool isBangOk{true};
   for (std::size_t j{0}; j < tokens; ++j) {
     CharBlock token{TokenAt(j)};
     char ch{token.FirstNonBlank()};
     if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
-      if (ch == '!' && isBangOk) {
-        // allow in !dir$
-      } else if (ch < ' ' || ch >= '\x7f') {
+      if (ch == '!') {
+        if (prescanner.IsCompilerDirectiveSentinel(token)) {
+          continue;
+        } else if (j + 1 < tokens &&
+            prescanner.IsCompilerDirectiveSentinel(
+                TokenAt(j + 1))) { // !dir$, &c.
+          ++j;
+          continue;
+        }
+      }
+      if (ch < ' ' || ch >= '\x7f') {
         messages.Say(GetTokenProvenanceRange(j),
             "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
       } else {
@@ -360,11 +367,6 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
             "bad character ('%c') in Fortran token"_err_en_US, ch);
       }
     }
-    if (ch == ';') {
-      isBangOk = true;
-    } else if (ch != ' ') {
-      isBangOk = false;
-    }
   }
   return *this;
 }
diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index 6b9e1f87ee01609..3df403d41e636f9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -123,7 +123,8 @@ class TokenSequence {
   TokenSequence &RemoveBlanks(std::size_t firstChar = 0);
   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
   TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
-  const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+  const TokenSequence &CheckBadFortranCharacters(
+      Messages &, const Prescanner &) const;
   const TokenSequence &CheckBadParentheses(Messages &) const;
   void Emit(CookedSource &) const;
   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
diff --git a/flang/test/Preprocessing/preprocessed-dirs.F90 b/flang/test/Preprocessing/preprocessed-dirs.F90
new file mode 100644
index 000000000000000..8ac769fdfb61da9
--- /dev/null
+++ b/flang/test/Preprocessing/preprocessed-dirs.F90
@@ -0,0 +1,8 @@
+! RUN: %flang -fc1 -E -fopenacc %s 2>&1 | FileCheck %s
+!CHECK: subroutine r4(x) Z real :: x Z !$acc routine Z print *, x Z end
+#define SUB(s, t) subroutine s(x) Z\
+  t :: x Z\
+  !$acc routine Z\
+  print *, x Z\
+  end subroutine s
+SUB(r4, real)

``````````

</details>


https://github.com/llvm/llvm-project/pull/70699


More information about the flang-commits mailing list