[flang-commits] [flang] [flang] Fix handling of identifier in column 1 of free form continuat… (PR #146430)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Mon Jun 30 16:46:22 PDT 2025


https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/146430

>From 15a86d337e2202c2a9f6d398b8943423bfc599e8 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Mon, 30 Jun 2025 15:11:06 -0700
Subject: [PATCH] [flang] Fix handling of identifier in column 1 of free form
 continuation line

An obsolete flag ("insertASpace_") is being used to signal some
cases in the prescanner's implementation of continuation lines
when a token should be broken when it straddles a line break.
It turns out that it's sufficient to simply note these cases
without ever actually inserting a space, so don't do that
(fixing the motivating bug).  This leaves some variables with
obsolete names, so change them as well.

This patch handles the third of the three bugs reported in
https://github.com/llvm/llvm-project/issues/146362 .

more
---
 flang/lib/Parser/prescan.cpp         | 36 +++++++++++++++-------------
 flang/lib/Parser/prescan.h           | 17 ++++++++-----
 flang/test/Preprocessing/bug1077.F90 |  7 ++++++
 flang/test/Preprocessing/pp111.F90   |  2 +-
 flang/test/Preprocessing/pp112.F90   |  2 +-
 flang/test/Preprocessing/pp115.F90   |  2 +-
 flang/test/Preprocessing/pp116.F90   |  2 +-
 7 files changed, 41 insertions(+), 27 deletions(-)
 create mode 100644 flang/test/Preprocessing/bug1077.F90

diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index ed5184b0aa13d..ec894ab8513d2 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -593,13 +593,13 @@ bool Prescanner::SkipToNextSignificantCharacter() {
     return false;
   } else {
     auto anyContinuationLine{false};
-    bool mightNeedSpace{false};
+    bool atNewline{false};
     if (MustSkipToEndOfLine()) {
       SkipToEndOfLine();
     } else {
-      mightNeedSpace = *at_ == '\n';
+      atNewline = *at_ == '\n';
     }
-    for (; Continuation(mightNeedSpace); mightNeedSpace = false) {
+    for (; Continuation(atNewline); atNewline = false) {
       anyContinuationLine = true;
       ++continuationLines_;
       if (MustSkipToEndOfLine()) {
@@ -641,7 +641,7 @@ void Prescanner::SkipSpaces() {
   while (IsSpaceOrTab(at_)) {
     NextChar();
   }
-  insertASpace_ = false;
+  brokenToken_ = false;
 }
 
 const char *Prescanner::SkipWhiteSpace(const char *p) {
@@ -745,10 +745,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
       }
     }
   }
-  if (insertASpace_) {
-    tokens.PutNextTokenChar(' ', spaceProvenance_);
-    insertASpace_ = false;
-  }
+  brokenToken_ = false;
   if (*at_ == '\n') {
     return false;
   }
@@ -808,7 +805,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
     bool anyDefined{false};
     bool hadContinuation{false};
     // Subtlety: When an identifier is split across continuation lines,
-    // its parts are kept as distinct pp-tokens if that macro replacement
+    // its parts are kept as distinct pp-tokens if macro replacement
     // should operate on them independently.  This trick accommodates the
     // historic practice of using line continuation for token pasting after
     // replacement.
@@ -822,6 +819,9 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
       ++at_, ++column_;
       hadContinuation = SkipToNextSignificantCharacter();
       if (hadContinuation && IsLegalIdentifierStart(*at_)) {
+        if (brokenToken_) {
+          break;
+        }
         // Continued identifier
         tokens.CloseToken();
         ++parts;
@@ -1348,7 +1348,7 @@ bool Prescanner::SkipCommentLine(bool afterAmpersand) {
   return false;
 }
 
-const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
+const char *Prescanner::FixedFormContinuationLine(bool atNewline) {
   if (IsAtEnd()) {
     return nullptr;
   }
@@ -1381,8 +1381,8 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
       }
       const char *col6{nextLine_ + 5};
       if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
-        if (mightNeedSpace && !IsSpace(nextLine_ + 6)) {
-          insertASpace_ = true;
+        if (atNewline && !IsSpace(nextLine_ + 6)) {
+          brokenToken_ = true;
         }
         return nextLine_ + 6;
       }
@@ -1395,7 +1395,9 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
         nextLine_[4] == ' ' && IsCompilerDirectiveSentinel(&nextLine_[1], 1)) {
       if (const char *col6{nextLine_ + 5};
           *col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
-        insertASpace_ |= mightNeedSpace && !IsSpace(nextLine_ + 6);
+        if (atNewline && !IsSpace(nextLine_ + 6)) {
+          brokenToken_ = true;
+        }
         return nextLine_ + 6;
       } else {
         return nullptr;
@@ -1464,7 +1466,7 @@ const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
     p = SkipWhiteSpace(p);
     if (*p == '&') {
       if (!ampersand) {
-        insertASpace_ = true;
+        brokenToken_ = true;
       }
       return p + 1;
     } else if (ampersand) {
@@ -1494,7 +1496,7 @@ const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
     } else if (p > lineStart && IsSpaceOrTab(p - 1)) {
       --p;
     } else {
-      insertASpace_ = true;
+      brokenToken_ = true;
     }
     return p;
   } else {
@@ -1502,14 +1504,14 @@ const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
   }
 }
 
-bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
+bool Prescanner::FixedFormContinuation(bool atNewline) {
   // N.B. We accept '&' as a continuation indicator in fixed form, too,
   // but not in a character literal.
   if (*at_ == '&' && inCharLiteral_) {
     return false;
   }
   do {
-    if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
+    if (const char *cont{FixedFormContinuationLine(atNewline)}) {
       BeginSourceLine(cont);
       column_ = 7;
       NextLine();
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index ec4c53cf3e0f2..f650d548e6eff 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -203,10 +203,10 @@ class Prescanner {
   std::optional<std::size_t> IsIncludeLine(const char *) const;
   void FortranInclude(const char *quote);
   const char *IsPreprocessorDirectiveLine(const char *) const;
-  const char *FixedFormContinuationLine(bool mightNeedSpace);
+  const char *FixedFormContinuationLine(bool atNewline);
   const char *FreeFormContinuationLine(bool ampersand);
   bool IsImplicitContinuation() const;
-  bool FixedFormContinuation(bool mightNeedSpace);
+  bool FixedFormContinuation(bool atNewline);
   bool FreeFormContinuation();
   bool Continuation(bool mightNeedFixedFormSpace);
   std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
@@ -256,10 +256,15 @@ class Prescanner {
   bool continuationInCharLiteral_{false};
   bool inPreprocessorDirective_{false};
 
-  // In some edge cases of compiler directive continuation lines, it
-  // is necessary to treat the line break as a space character by
-  // setting this flag, which is cleared by EmitChar().
-  bool insertASpace_{false};
+  // True after processing a continuation that can't be allowed
+  // to appear in the middle of an identifier token, but is fixed form,
+  // or is free form and doesn't have a space character handy to use as
+  // a separator when:
+  // a) (standard) doesn't begin with a leading '&' on the continuation
+  //     line, but has a non-blank in column 1, or
+  // b) (extension) does have a leading '&', but didn't have one
+  //    on the continued line.
+  bool brokenToken_{false};
 
   // When a free form continuation marker (&) appears at the end of a line
   // before a INCLUDE or #include, we delete it and omit the newline, so
diff --git a/flang/test/Preprocessing/bug1077.F90 b/flang/test/Preprocessing/bug1077.F90
new file mode 100644
index 0000000000000..dd7391813a357
--- /dev/null
+++ b/flang/test/Preprocessing/bug1077.F90
@@ -0,0 +1,7 @@
+!RUN: %flang -E %s 2>&1 | FileCheck %s
+!CHECK: print *,((1)+(2)),4
+#define foo(x,y) ((x)+(y))
+print *,&
+foo(1,2)&
+,4
+end
diff --git a/flang/test/Preprocessing/pp111.F90 b/flang/test/Preprocessing/pp111.F90
index 4da45ef35f5c0..bbf8709c3ab15 100644
--- a/flang/test/Preprocessing/pp111.F90
+++ b/flang/test/Preprocessing/pp111.F90
@@ -1,5 +1,5 @@
 ! RUN: %flang -E %s 2>&1 | FileCheck %s
-! CHECK: res = IFLM (666)
+! CHECK: res = IFLM(666)
 ! FLM call name split across continuation, no leading &, with & ! comment
       integer function IFLM(x)
         integer :: x
diff --git a/flang/test/Preprocessing/pp112.F90 b/flang/test/Preprocessing/pp112.F90
index 16705527f68c3..a5244410f31af 100644
--- a/flang/test/Preprocessing/pp112.F90
+++ b/flang/test/Preprocessing/pp112.F90
@@ -1,5 +1,5 @@
 ! RUN: %flang -E %s 2>&1 | FileCheck %s
-! CHECK: res = IFLM (666)
+! CHECK: res = IFLM(666)
 ! ditto, but without & ! comment
       integer function IFLM(x)
         integer :: x
diff --git a/flang/test/Preprocessing/pp115.F90 b/flang/test/Preprocessing/pp115.F90
index 4e4c621110ed8..eea42c53b936d 100644
--- a/flang/test/Preprocessing/pp115.F90
+++ b/flang/test/Preprocessing/pp115.F90
@@ -1,5 +1,5 @@
 ! RUN: %flang -E %s 2>&1 | FileCheck %s
-! CHECK: res = IFLM (666)
+! CHECK: res = ((666)+111)
 ! ditto, with & ! comment, no leading &
       integer function IFLM(x)
         integer :: x
diff --git a/flang/test/Preprocessing/pp116.F90 b/flang/test/Preprocessing/pp116.F90
index e35a13cbf6489..39edf95763eab 100644
--- a/flang/test/Preprocessing/pp116.F90
+++ b/flang/test/Preprocessing/pp116.F90
@@ -1,5 +1,5 @@
 ! RUN: %flang -E %s 2>&1 | FileCheck %s
-! CHECK: res = IFLM (666)
+! CHECK: res = ((666)+111)
 ! FLM call split between name and (, no leading &
       integer function IFLM(x)
         integer :: x



More information about the flang-commits mailing list