[flang-commits] [flang] [flang] Disable Fortran free form line continuation in non-source lin… (PR #94663)

Mon Jun 10 13:50:35 PDT 2024

https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/94663

>From 51940450faa456527668337d7af6ed9f1482a085 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Thu, 6 Jun 2024 12:16:22 -0700
Subject: [PATCH] [flang] Disable Fortran free form line continuation in
 non-source line produced by keyword macro replacement

When later initial keyword macro replacement will yield a line that is not
Fortran source, don't interpret "&" as a Fortran source line continuation
marker during tokenization of the line.

Fixes https://github.com/llvm/llvm-project/issues/82579.
---
 flang/include/flang/Parser/token-sequence.h   |  2 +-
 flang/lib/Parser/prescan.cpp                  | 75 ++++++++++++++-----
 flang/lib/Parser/prescan.h                    |  5 ++
 flang/lib/Parser/token-sequence.cpp           |  5 +-
 .../directive-contin-with-pp.F90              | 63 ++++++++++++----
 5 files changed, 116 insertions(+), 34 deletions(-)

diff --git a/flang/include/flang/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h
index 849240d8ec62c..ee5f71edd03c8 100644
--- a/flang/include/flang/Parser/token-sequence.h
+++ b/flang/include/flang/Parser/token-sequence.h
@@ -124,7 +124,7 @@ class TokenSequence {
   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
   TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
   const TokenSequence &CheckBadFortranCharacters(
-      Messages &, const Prescanner &) const;
+      Messages &, const Prescanner &, bool allowAmpersand) const;
   const TokenSequence &CheckBadParentheses(Messages &) const;
   void Emit(CookedSource &) const;
   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index f9b9c3d2c6e56..e4801c36505b4 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -180,6 +180,28 @@ void Prescanner::Statement() {
       }
     } else {
       SkipSpaces();
+      // Check for a leading identifier that might be a keyword macro
+      // that will expand to anything indicating a non-source line, like
+      // a comment marker or directive sentinel.  If so, disable line
+      // continuation, so that NextToken() won't consume anything from
+      // following lines.
+      if (IsLegalIdentifierStart(*at_)) {
+        CHECK(NextToken(tokens));
+        CHECK(tokens.SizeInTokens() == 1);
+        CharBlock id{tokens.TokenAt(0)};
+        if (preprocessor_.IsNameDefined(id) &&
+            !preprocessor_.IsFunctionLikeDefinition(id)) {
+          if (auto replaced{preprocessor_.MacroReplacement(tokens, *this)}) {
+            auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
+            disableSourceContinuation_ =
+                newLineClass.kind != LineClassification::Kind::Source;
+            if (newLineClass.kind ==
+                LineClassification::Kind::CompilerDirective) {
+              directiveSentinel_ = newLineClass.sentinel;
+            }
+          }
+        }
+      }
     }
     break;
   }
@@ -197,17 +219,13 @@ void Prescanner::Statement() {
   Provenance newlineProvenance{GetCurrentProvenance()};
   if (std::optional<TokenSequence> preprocessed{
           preprocessor_.MacroReplacement(tokens, *this)}) {
-    // Reprocess the preprocessed line.  Append a newline temporarily.
-    preprocessed->PutNextTokenChar('\n', newlineProvenance);
-    preprocessed->CloseToken();
-    const char *ppd{preprocessed->ToCharBlock().begin()};
-    LineClassification ppl{ClassifyLine(ppd)};
-    preprocessed->pop_back(); // remove the newline
+    // Reprocess the preprocessed line.
+    LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)};
     switch (ppl.kind) {
     case LineClassification::Kind::Comment:
       break;
     case LineClassification::Kind::IncludeLine:
-      FortranInclude(ppd + ppl.payloadOffset);
+      FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset);
       break;
     case LineClassification::Kind::ConditionalCompilationDirective:
     case LineClassification::Kind::IncludeDirective:
@@ -270,7 +288,8 @@ void Prescanner::Statement() {
 
 void Prescanner::CheckAndEmitLine(
     TokenSequence &tokens, Provenance newlineProvenance) {
-  tokens.CheckBadFortranCharacters(messages_, *this);
+  tokens.CheckBadFortranCharacters(
+      messages_, *this, disableSourceContinuation_);
   // Parenthesis nesting check does not apply while any #include is
   // active, nor on the lines before and after a top-level #include.
   // Applications play shenanigans with line continuation before and
@@ -1243,7 +1262,9 @@ bool Prescanner::IsImplicitContinuation() const {
 }
 
 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
-  if (*at_ == '\n' || *at_ == '&') {
+  if (disableSourceContinuation_) {
+    return false;
+  } else if (*at_ == '\n' || *at_ == '&') {
     if (inFixedForm_) {
       return FixedFormContinuation(mightNeedFixedFormSpace);
     } else {
@@ -1255,8 +1276,9 @@ bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
     BeginSourceLine(nextLine_);
     NextLine();
     return true;
+  } else {
+    return false;
   }
-  return false;
 }
 
 std::optional<Prescanner::LineClassification>
@@ -1418,6 +1440,17 @@ Prescanner::LineClassification Prescanner::ClassifyLine(
   return {LineClassification::Kind::Source};
 }
 
+Prescanner::LineClassification Prescanner::ClassifyLine(
+    TokenSequence &tokens, Provenance newlineProvenance) const {
+  // Append a newline temporarily.
+  tokens.PutNextTokenChar('\n', newlineProvenance);
+  tokens.CloseToken();
+  const char *ppd{tokens.ToCharBlock().begin()};
+  LineClassification classification{ClassifyLine(ppd)};
+  tokens.pop_back(); // remove the newline
+  return classification;
+}
+
 void Prescanner::SourceFormChange(std::string &&dir) {
   if (dir == "!dir$ free") {
     inFixedForm_ = false;
@@ -1445,7 +1478,7 @@ bool Prescanner::CompilerDirectiveContinuation(
     return true;
   }
   CHECK(origSentinel != nullptr);
-  directiveSentinel_ = origSentinel; // so IsDirective() is true
+  directiveSentinel_ = origSentinel; // so InCompilerDirective() is true
   const char *nextContinuation{
       followingLine.kind == LineClassification::Kind::CompilerDirective
           ? FreeFormContinuationLine(true)
@@ -1457,7 +1490,6 @@ bool Prescanner::CompilerDirectiveContinuation(
   auto origNextLine{nextLine_};
   BeginSourceLine(nextLine_);
   NextLine();
-  TokenSequence followingTokens;
   if (nextContinuation) {
     // What follows is !DIR$ & xxx; skip over the & so that it
     // doesn't cause a spurious continuation.
@@ -1467,6 +1499,7 @@ bool Prescanner::CompilerDirectiveContinuation(
     // but might become a directive continuation afterwards.
     SkipSpaces();
   }
+  TokenSequence followingTokens;
   while (NextToken(followingTokens)) {
   }
   if (auto followingPrepro{
@@ -1475,25 +1508,31 @@ bool Prescanner::CompilerDirectiveContinuation(
   }
   followingTokens.RemoveRedundantBlanks();
   std::size_t startAt{0};
-  std::size_t keep{followingTokens.SizeInTokens()};
+  std::size_t following{followingTokens.SizeInTokens()};
   bool ok{false};
   if (nextContinuation) {
     ok = true;
   } else {
-    if (keep >= 3 && followingTokens.TokenAt(0) == "!" &&
-        followingTokens.TokenAt(2) == "&") {
+    startAt = 2;
+    if (startAt < following && followingTokens.TokenAt(0) == "!") {
       CharBlock sentinel{followingTokens.TokenAt(1)};
       if (!sentinel.empty() &&
           std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) {
-        startAt = 3;
-        keep -= 3;
         ok = true;
+        while (
+            startAt < following && followingTokens.TokenAt(startAt).IsBlank()) {
+          ++startAt;
+        }
+        if (startAt < following && followingTokens.TokenAt(startAt) == "&") {
+          ++startAt;
+        }
       }
     }
   }
   if (ok) {
     tokens.pop_back(); // delete original '&'
-    tokens.Put(followingTokens, startAt, keep);
+    tokens.Put(followingTokens, startAt, following - startAt);
+    tokens.RemoveRedundantBlanks();
   } else {
     nextLine_ = origNextLine;
   }
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 491b1fe0a7517..cf64bdb02a9b7 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -94,6 +94,7 @@ class Prescanner {
     LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
         : kind{k}, payloadOffset{po}, sentinel{s} {}
     LineClassification(LineClassification &&) = default;
+    LineClassification &operator=(LineClassification &&) = default;
     Kind kind;
     std::size_t payloadOffset; // byte offset of content
     const char *sentinel; // if it's a compiler directive
@@ -117,6 +118,7 @@ class Prescanner {
     parenthesisNesting_ = 0;
     continuationLines_ = 0;
     isPossibleMacroCall_ = false;
+    disableSourceContinuation_ = false;
   }
 
   Provenance GetProvenance(const char *sourceChar) const {
@@ -192,6 +194,8 @@ class Prescanner {
   std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
       const char *) const;
   LineClassification ClassifyLine(const char *) const;
+  LineClassification ClassifyLine(
+      TokenSequence &, Provenance newlineProvenance) const;
   void SourceFormChange(std::string &&);
   bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel);
   bool SourceLineContinuation(TokenSequence &);
@@ -211,6 +215,7 @@ class Prescanner {
   int continuationLines_{0};
   bool isPossibleMacroCall_{false};
   bool afterIncludeDirective_{false};
+  bool disableSourceContinuation_{false};
 
   Provenance startProvenance_;
   const char *start_{nullptr}; // beginning of current source file content
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index d0254ecd5aaef..40560bbacb54f 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -347,7 +347,8 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const {
 }
 
 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
-    Messages &messages, const Prescanner &prescanner) const {
+    Messages &messages, const Prescanner &prescanner,
+    bool allowAmpersand) const {
   std::size_t tokens{SizeInTokens()};
   for (std::size_t j{0}; j < tokens; ++j) {
     CharBlock token{TokenAt(j)};
@@ -362,6 +363,8 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
           ++j;
           continue;
         }
+      } else if (ch == '&' && allowAmpersand) {
+        continue;
       }
       if (ch < ' ' || ch >= '\x7f') {
         messages.Say(GetTokenProvenanceRange(j),
diff --git a/flang/test/Preprocessing/directive-contin-with-pp.F90 b/flang/test/Preprocessing/directive-contin-with-pp.F90
index 9a06ae8438210..be8eb4d3c1cee 100644
--- a/flang/test/Preprocessing/directive-contin-with-pp.F90
+++ b/flang/test/Preprocessing/directive-contin-with-pp.F90
@@ -1,12 +1,17 @@
-! RUN: %flang -E %s 2>&1 | FileCheck %s
+! RUN: %flang -fc1 -fdebug-unparse -fopenmp %s 2>&1 | FileCheck %s
 
 #define DIR_START !dir$
 #define DIR_CONT !dir$&
 #define FIRST(x) DIR_START x
 #define NEXT(x) DIR_CONT x
 #define AMPER &
+#define COMMENT !
+#define OMP_START !$omp
+#define OMP_CONT !$omp&
 
-subroutine s(x1, x2, x3, x4, x5, x6, x7)
+module m
+ contains
+  subroutine s(x1, x2, x3, x4, x5, x6, x7)
 
 !dir$ ignore_tkr x1
 
@@ -24,18 +29,48 @@ subroutine s(x1, x2, x3, x4, x5, x6, x7)
 FIRST(ignore_tkr &)
 NEXT(x6)
 
-FIRST(ignore_tkr &)
-NEXT(x7 &)
-NEXT(x8)
+COMMENT blah &
+COMMENT & more
+    stop 1
+
+OMP_START parallel &
+OMP_START do &
+OMP_START reduction(+:x)
+    do j1 = 1, n
+    end do
+
+OMP_START parallel &
+OMP_START & do &
+OMP_START & reduction(+:x)
+    do j2 = 1, n
+    end do
 
+OMP_START parallel &
+OMP_CONT do &
+OMP_CONT reduction(+:x)
+    do j3 = 1, n
+    end do
+  end
 end
 
-!CHECK: subroutine s(x1, x2, x3, x4, x5, x6, x7)
-!CHECK: !dir$ ignore_tkr x1
-!CHECK: !dir$ ignore_tkr x2
-!CHECK: !dir$ ignore_tkr x3
-!CHECK: !dir$ ignore_tkr  x4
-!CHECK: !dir$ ignore_tkr  x5
-!CHECK: !dir$ ignore_tkr  x6
-!CHECK: !dir$ ignore_tkr  x7  x8
-!CHECK: end
+!CHECK: MODULE m
+!CHECK: CONTAINS
+!CHECK:  SUBROUTINE s (x1, x2, x3, x4, x5, x6, x7)
+!CHECK:   !DIR$ IGNORE_TKR x1
+!CHECK:   !DIR$ IGNORE_TKR x2
+!CHECK:   !DIR$ IGNORE_TKR x3
+!CHECK:   !DIR$ IGNORE_TKR x4
+!CHECK:   !DIR$ IGNORE_TKR x5
+!CHECK:   !DIR$ IGNORE_TKR x6
+!CHECK:   STOP 1_4
+!CHECK: !$OMP PARALLEL DO  REDUCTION(+:x)
+!CHECK:   DO j1=1_4,n
+!CHECK:   END DO
+!CHECK: !$OMP PARALLEL DO  REDUCTION(+:x)
+!CHECK:   DO j2=1_4,n
+!CHECK:   END DO
+!CHECK: !$OMP PARALLEL DO  REDUCTION(+:x)
+!CHECK:   DO j3=1_4,n
+!CHECK:   END DO
+!CHECK:  END SUBROUTINE
+!CHECK: END MODULE