[flang-commits] [flang] 094b380 - [flang] Catch mismatched parentheses in prescanner

Wed Oct 6 14:00:23 PDT 2021

Author: peter klausler
Date: 2021-10-06T14:00:15-07:00
New Revision: 094b380c210ac508a1ad5a7c47a760773b0cc6ea

URL: https://github.com/llvm/llvm-project/commit/094b380c210ac508a1ad5a7c47a760773b0cc6ea
DIFF: https://github.com/llvm/llvm-project/commit/094b380c210ac508a1ad5a7c47a760773b0cc6ea.diff

LOG: [flang] Catch mismatched parentheses in prescanner

Source lines with mismatched parentheses are hard cases for error
recovery in parsing, and the best error message (viz.,
"here's an unmatched parenthesis") can be emitted from the
prescanner.

Differential Revision: https://reviews.llvm.org/D111254#3046173

Added: 
    flang/test/Parser/unmatched-parens.f90

Modified: 
    flang/lib/Parser/prescan.cpp
    flang/lib/Parser/token-sequence.cpp
    flang/lib/Parser/token-sequence.h

Removed: 
    


################################################################################
diff  --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 0531ffd5315b..7d7ee3616fcf 100644

--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -169,7 +169,7 @@ void Prescanner::Statement() {
     preprocessed->CloseToken();
     const char *ppd{preprocessed->ToCharBlock().begin()};
     LineClassification ppl{ClassifyLine(ppd)};
-    preprocessed->RemoveLastToken(); // remove the newline
+    preprocessed->pop_back(); // remove the newline
     switch (ppl.kind) {
     case LineClassification::Kind::Comment:
       break;
@@ -182,8 +182,10 @@ void Prescanner::Statement() {
     case LineClassification::Kind::PreprocessorDirective:
       Say(preprocessed->GetProvenanceRange(),
           "Preprocessed line resembles a preprocessor directive"_en_US);
-      preprocessed->ToLowerCase().CheckBadFortranCharacters(messages_).Emit(
-          cooked_);
+      preprocessed->ToLowerCase()
+          .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
+          .Emit(cooked_);
       break;
     case LineClassification::Kind::CompilerDirective:
       if (preprocessed->HasRedundantBlanks()) {
@@ -194,6 +196,7 @@ void Prescanner::Statement() {
       SourceFormChange(preprocessed->ToString());
       preprocessed->ClipComment(true /* skip first ! */)
           .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
     case LineClassification::Kind::Source:
@@ -209,6 +212,7 @@ void Prescanner::Statement() {
       preprocessed->ToLowerCase()
           .ClipComment()
           .CheckBadFortranCharacters(messages_)
+          .CheckBadParentheses(messages_)
           .Emit(cooked_);
       break;
     }
@@ -220,7 +224,9 @@ void Prescanner::Statement() {
     if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
       EnforceStupidEndStatementRules(tokens);
     }
-    tokens.CheckBadFortranCharacters(messages_).Emit(cooked_);
+    tokens.CheckBadFortranCharacters(messages_)
+        .CheckBadParentheses(messages_)
+        .Emit(cooked_);
   }
   if (omitNewline_) {
     omitNewline_ = false;
@@ -255,6 +261,7 @@ void Prescanner::NextLine() {
 void Prescanner::LabelField(TokenSequence &token) {
   const char *bad{nullptr};
   int outCol{1};
+  const char *start{at_};
   for (; *at_ != '\n' && column_ <= 6; ++at_) {
     if (*at_ == '\t') {
       ++at_;
@@ -271,17 +278,19 @@ void Prescanner::LabelField(TokenSequence &token) {
     }
     ++column_;
   }
+  if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
+    Say(GetProvenance(bad),
+        "Character in fixed-form label field must be a digit"_en_US);
+    token.clear();
+    at_ = start;
+    return;
+  }
   if (outCol == 1) { // empty label field
     // Emit a space so that, if the line is rescanned after preprocessing,
     // a leading 'C' or 'D' won't be left-justified and then accidentally
     // misinterpreted as a comment card.
     EmitChar(token, ' ');
     ++outCol;
-  } else {
-    if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
-      Say(GetProvenance(bad),
-          "Character in fixed-form label field must be a digit"_en_US);
-    }
   }
   token.CloseToken();
   SkipToNextSignificantCharacter();

diff  --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 3ed9d05e8504..55217a95dfb9 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -27,6 +27,8 @@ void TokenSequence::clear() {
 }
 
 void TokenSequence::pop_back() {
+  CHECK(!start_.empty());
+  CHECK(nextStart_ > start_.back());
   std::size_t bytes{nextStart_ - start_.back()};
   nextStart_ = start_.back();
   start_.pop_back();
@@ -82,16 +84,6 @@ bool TokenSequence::IsAnythingLeft(std::size_t at) const {
   return false;
 }
 
-void TokenSequence::RemoveLastToken() {
-  CHECK(!start_.empty());
-  CHECK(nextStart_ > start_.back());
-  std::size_t bytes{nextStart_ - start_.back()};
-  nextStart_ = start_.back();
-  start_.pop_back();
-  char_.erase(char_.begin() + nextStart_, char_.end());
-  provenances_.RemoveLastBytes(bytes);
-}
-
 void TokenSequence::Put(const TokenSequence &that) {
   if (nextStart_ < char_.size()) {
     start_.push_back(nextStart_);
@@ -338,4 +330,41 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
   }
   return *this;
 }
+
+const TokenSequence &TokenSequence::CheckBadParentheses(
+    Messages &messages) const {
+  // First, a quick pass with no allocation for the common case
+  int nesting{0};
+  std::size_t tokens{SizeInTokens()};
+  for (std::size_t j{0}; j < tokens; ++j) {
+    CharBlock token{TokenAt(j)};
+    char ch{token.FirstNonBlank()};
+    if (ch == '(') {
+      ++nesting;
+    } else if (ch == ')') {
+      --nesting;
+    }
+  }
+  if (nesting != 0) {
+    // There's an error; diagnose it
+    std::vector<std::size_t> stack;
+    for (std::size_t j{0}; j < tokens; ++j) {
+      CharBlock token{TokenAt(j)};
+      char ch{token.FirstNonBlank()};
+      if (ch == '(') {
+        stack.push_back(j);
+      } else if (ch == ')') {
+        if (stack.empty()) {
+          messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
+          return *this;
+        }
+        stack.pop_back();
+      }
+    }
+    CHECK(!stack.empty());
+    messages.Say(
+        GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
+  }
+  return *this;
+}
 } // namespace Fortran::parser

diff  --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index 6da6229614fe..c039126bee0f 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -92,8 +92,6 @@ class TokenSequence {
     start_.pop_back();
   }
 
-  void RemoveLastToken();
-
   void Put(const TokenSequence &);
   void Put(const TokenSequence &, ProvenanceRange);
   void Put(const TokenSequence &, std::size_t at, std::size_t tokens = 1);
@@ -119,6 +117,7 @@ class TokenSequence {
   TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
   TokenSequence &ClipComment(bool skipFirst = false);
   const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+  const TokenSequence &CheckBadParentheses(Messages &) const;
   void Emit(CookedSource &) const;
   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
 

diff  --git a/flang/test/Parser/unmatched-parens.f90 b/flang/test/Parser/unmatched-parens.f90
new file mode 100644
index 000000000000..c0a3e843f4f7
--- /dev/null
+++ b/flang/test/Parser/unmatched-parens.f90
@@ -0,0 +1,8 @@
+! RUN: not %flang_fc1 -E %s 2>&1 | FileCheck %s
+do i = 1,10
+  ! CHECK: Unmatched '('
+  if (i != 0) then
+    exit
+  endif
+enddo
+end