[flang-commits] [flang] 297230a - [flang] Accept directive sentinels in macro-replaced source better (#70699)

via flang-commits flang-commits at lists.llvm.org
Tue Oct 31 12:35:05 PDT 2023


Author: Peter Klausler
Date: 2023-10-31T12:35:01-07:00
New Revision: 297230acdadd667fda379debd7dda20ace4f7213

URL: https://github.com/llvm/llvm-project/commit/297230acdadd667fda379debd7dda20ace4f7213
DIFF: https://github.com/llvm/llvm-project/commit/297230acdadd667fda379debd7dda20ace4f7213.diff

LOG: [flang] Accept directive sentinels in macro-replaced source better (#70699)

At present, the prescanner emits an error if a source line or compiler
directive, after macro replacement or not, contains a token with a
non-Fortran character. In the particular case of the '!' character, the
code that checks for bad character will accept the '!' if it appears
after a ';', since the '!' might begin a compiler directive.

This current implementation fails when a compiler directive appears
after some other character that might (by means of further source
processing not visible to the prescanner) be replaced with a ';' or
newline.

Extend the bad character check for '!' to actually check for a compiler
directive sentinel instead.

Added: 
    flang/test/Preprocessing/preprocessed-dirs.F90

Modified: 
    flang/lib/Parser/prescan.cpp
    flang/lib/Parser/prescan.h
    flang/lib/Parser/token-sequence.cpp
    flang/lib/Parser/token-sequence.h

Removed: 
    


################################################################################
diff  --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 5b600822282a443..0408de6208647c9 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -205,7 +205,7 @@ void Prescanner::Statement() {
       Say(preprocessed->GetProvenanceRange(),
           "Preprocessed line resembles a preprocessor directive"_warn_en_US);
       preprocessed->ToLowerCase()
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -220,7 +220,7 @@ void Prescanner::Statement() {
       preprocessed->ToLowerCase();
       SourceFormChange(preprocessed->ToString());
       preprocessed->ClipComment(*this, true /* skip first ! */)
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -239,7 +239,7 @@ void Prescanner::Statement() {
       }
       preprocessed->ToLowerCase()
           .ClipComment(*this)
-          .CheckBadFortranCharacters(messages_)
+          .CheckBadFortranCharacters(messages_, *this)
           .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
@@ -257,7 +257,7 @@ void Prescanner::Statement() {
         EnforceStupidEndStatementRules(tokens);
       }
     }
-    tokens.CheckBadFortranCharacters(messages_)
+    tokens.CheckBadFortranCharacters(messages_, *this)
         .CheckBadParentheses(messages_)
         .Emit(cooked_);
   }
@@ -1277,6 +1277,21 @@ const char *Prescanner::IsCompilerDirectiveSentinel(
   return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
 }
 
+const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
+  const char *p{token.begin()};
+  const char *end{p + token.size()};
+  while (p < end && (*p == ' ' || *p == '\n')) {
+    ++p;
+  }
+  if (p < end && *p == '!') {
+    ++p;
+  }
+  while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
+    --end;
+  }
+  return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
+}
+
 constexpr bool IsDirective(const char *match, const char *dir) {
   for (; *match; ++match) {
     if (*match != ToLowerCaseLetter(*dir++)) {

diff  --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 5f80d26636dc719..16b2c6165f611c7 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -68,7 +68,9 @@ class Prescanner {
   bool IsNextLinePreprocessorDirective() const;
   TokenSequence TokenizePreprocessorDirective();
   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
+
   const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
+  const char *IsCompilerDirectiveSentinel(CharBlock) const;
 
   template <typename... A> Message &Say(A &&...a) {
     return messages_.Say(std::forward<A>(a)...);

diff  --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 139d2e1ba811d63..c5a630c471d16ea 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -343,16 +343,23 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const {
 }
 
 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
-    Messages &messages) const {
+    Messages &messages, const Prescanner &prescanner) const {
   std::size_t tokens{SizeInTokens()};
-  bool isBangOk{true};
   for (std::size_t j{0}; j < tokens; ++j) {
     CharBlock token{TokenAt(j)};
     char ch{token.FirstNonBlank()};
     if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
-      if (ch == '!' && isBangOk) {
-        // allow in !dir$
-      } else if (ch < ' ' || ch >= '\x7f') {
+      if (ch == '!') {
+        if (prescanner.IsCompilerDirectiveSentinel(token)) {
+          continue;
+        } else if (j + 1 < tokens &&
+            prescanner.IsCompilerDirectiveSentinel(
+                TokenAt(j + 1))) { // !dir$, &c.
+          ++j;
+          continue;
+        }
+      }
+      if (ch < ' ' || ch >= '\x7f') {
         messages.Say(GetTokenProvenanceRange(j),
             "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
       } else {
@@ -360,11 +367,6 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
             "bad character ('%c') in Fortran token"_err_en_US, ch);
       }
     }
-    if (ch == ';') {
-      isBangOk = true;
-    } else if (ch != ' ') {
-      isBangOk = false;
-    }
   }
   return *this;
 }

diff  --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index 6b9e1f87ee01609..3df403d41e636f9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -123,7 +123,8 @@ class TokenSequence {
   TokenSequence &RemoveBlanks(std::size_t firstChar = 0);
   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
   TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
-  const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+  const TokenSequence &CheckBadFortranCharacters(
+      Messages &, const Prescanner &) const;
   const TokenSequence &CheckBadParentheses(Messages &) const;
   void Emit(CookedSource &) const;
   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;

diff  --git a/flang/test/Preprocessing/preprocessed-dirs.F90 b/flang/test/Preprocessing/preprocessed-dirs.F90
new file mode 100644
index 000000000000000..8ac769fdfb61da9
--- /dev/null
+++ b/flang/test/Preprocessing/preprocessed-dirs.F90
@@ -0,0 +1,8 @@
+! RUN: %flang -fc1 -E -fopenacc %s 2>&1 | FileCheck %s
+!CHECK: subroutine r4(x) Z real :: x Z !$acc routine Z print *, x Z end
+#define SUB(s, t) subroutine s(x) Z\
+  t :: x Z\
+  !$acc routine Z\
+  print *, x Z\
+  end subroutine s
+SUB(r4, real)


        


More information about the flang-commits mailing list