[flang-commits] [flang] [flang] Accept directive sentinels in macro-replaced source better (PR #70699)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 30 11:27:31 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-parser
Author: Peter Klausler (klausler)
<details>
<summary>Changes</summary>
At present, the prescanner emits an error if a source line or compiler directive, after macro replacement or not, contains a token with a non-Fortran character. In the particular case of the '!' character, the code that checks for bad character will accept the '!' if it appears after a ';', since the '!' might begin a compiler directive.
This current implementation fails when a compiler directive appears after some other character that might (by means of further source processing not visible to the prescanner) be replaced with a ';' or newline.
Extend the bad character check for '!' to actually check for a compiler directive sentinel instead.
---
Full diff: https://github.com/llvm/llvm-project/pull/70699.diff
5 Files Affected:
- (modified) flang/lib/Parser/prescan.cpp (+19-4)
- (modified) flang/lib/Parser/prescan.h (+2)
- (modified) flang/lib/Parser/token-sequence.cpp (+12-10)
- (modified) flang/lib/Parser/token-sequence.h (+2-1)
- (added) flang/test/Preprocessing/preprocessed-dirs.F90 (+8)
``````````diff
diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 2f25b02bf7a323d..c9da6534ed88ba8 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -205,7 +205,7 @@ void Prescanner::Statement() {
Say(preprocessed->GetProvenanceRange(),
"Preprocessed line resembles a preprocessor directive"_warn_en_US);
preprocessed->ToLowerCase()
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -217,7 +217,7 @@ void Prescanner::Statement() {
preprocessed->ToLowerCase();
SourceFormChange(preprocessed->ToString());
preprocessed->ClipComment(*this, true /* skip first ! */)
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -233,7 +233,7 @@ void Prescanner::Statement() {
}
preprocessed->ToLowerCase()
.ClipComment(*this)
- .CheckBadFortranCharacters(messages_)
+ .CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
break;
@@ -246,7 +246,7 @@ void Prescanner::Statement() {
if (inFixedForm_ && line.kind == LineClassification::Kind::Source) {
EnforceStupidEndStatementRules(tokens);
}
- tokens.CheckBadFortranCharacters(messages_)
+ tokens.CheckBadFortranCharacters(messages_, *this)
.CheckBadParentheses(messages_)
.Emit(cooked_);
}
@@ -1266,6 +1266,21 @@ const char *Prescanner::IsCompilerDirectiveSentinel(
return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
}
+const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
+ const char *p{token.begin()};
+ const char *end{p + token.size()};
+ while (p < end && (*p == ' ' || *p == '\n')) {
+ ++p;
+ }
+ if (p < end && *p == '!') {
+ ++p;
+ }
+ while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
+ --end;
+ }
+ return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
+}
+
constexpr bool IsDirective(const char *match, const char *dir) {
for (; *match; ++match) {
if (*match != ToLowerCaseLetter(*dir++)) {
diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index 021632657a98c13..276fa19a4b1c64c 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -68,7 +68,9 @@ class Prescanner {
bool IsNextLinePreprocessorDirective() const;
TokenSequence TokenizePreprocessorDirective();
Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
+
const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
+ const char *IsCompilerDirectiveSentinel(CharBlock) const;
template <typename... A> Message &Say(A &&...a) {
return messages_.Say(std::forward<A>(a)...);
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index 139d2e1ba811d63..c5a630c471d16ea 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -343,16 +343,23 @@ ProvenanceRange TokenSequence::GetProvenanceRange() const {
}
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
- Messages &messages) const {
+ Messages &messages, const Prescanner &prescanner) const {
std::size_t tokens{SizeInTokens()};
- bool isBangOk{true};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
- if (ch == '!' && isBangOk) {
- // allow in !dir$
- } else if (ch < ' ' || ch >= '\x7f') {
+ if (ch == '!') {
+ if (prescanner.IsCompilerDirectiveSentinel(token)) {
+ continue;
+ } else if (j + 1 < tokens &&
+ prescanner.IsCompilerDirectiveSentinel(
+ TokenAt(j + 1))) { // !dir$, &c.
+ ++j;
+ continue;
+ }
+ }
+ if (ch < ' ' || ch >= '\x7f') {
messages.Say(GetTokenProvenanceRange(j),
"bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
} else {
@@ -360,11 +367,6 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters(
"bad character ('%c') in Fortran token"_err_en_US, ch);
}
}
- if (ch == ';') {
- isBangOk = true;
- } else if (ch != ' ') {
- isBangOk = false;
- }
}
return *this;
}
diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index 6b9e1f87ee01609..3df403d41e636f9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -123,7 +123,8 @@ class TokenSequence {
TokenSequence &RemoveBlanks(std::size_t firstChar = 0);
TokenSequence &RemoveRedundantBlanks(std::size_t firstChar = 0);
TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false);
- const TokenSequence &CheckBadFortranCharacters(Messages &) const;
+ const TokenSequence &CheckBadFortranCharacters(
+ Messages &, const Prescanner &) const;
const TokenSequence &CheckBadParentheses(Messages &) const;
void Emit(CookedSource &) const;
llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
diff --git a/flang/test/Preprocessing/preprocessed-dirs.F90 b/flang/test/Preprocessing/preprocessed-dirs.F90
new file mode 100644
index 000000000000000..8ac769fdfb61da9
--- /dev/null
+++ b/flang/test/Preprocessing/preprocessed-dirs.F90
@@ -0,0 +1,8 @@
+! RUN: %flang -fc1 -E -fopenacc %s 2>&1 | FileCheck %s
+!CHECK: subroutine r4(x) Z real :: x Z !$acc routine Z print *, x Z end
+#define SUB(s, t) subroutine s(x) Z\
+ t :: x Z\
+ !$acc routine Z\
+ print *, x Z\
+ end subroutine s
+SUB(r4, real)
``````````
</details>
https://github.com/llvm/llvm-project/pull/70699
More information about the flang-commits
mailing list