[flang-commits] [flang] 01def7f - [flang] Rework preprocessing of stringification

peter klausler via flang-commits flang-commits at lists.llvm.org
Fri Sep 18 10:46:18 PDT 2020


Author: peter klausler
Date: 2020-09-18T10:45:57-07:00
New Revision: 01def7f7c3f1f762ce57a89aceb85214669911c3

URL: https://github.com/llvm/llvm-project/commit/01def7f7c3f1f762ce57a89aceb85214669911c3
DIFF: https://github.com/llvm/llvm-project/commit/01def7f7c3f1f762ce57a89aceb85214669911c3.diff

LOG: [flang] Rework preprocessing of stringification

Hew more closely to the C17 standard; perform macro replacement
of arguments to function-like macros unless they're being stringified
or pasted.  Test with a model "assert" macro idiom that exposed
the problem.

Differential Revision: https://reviews.llvm.org/D87650

Added: 
    flang/test/Preprocessing/assert.F90

Modified: 
    flang/lib/Parser/parsing.cpp
    flang/lib/Parser/preprocessor.cpp
    flang/lib/Parser/preprocessor.h
    flang/lib/Parser/prescan.cpp
    flang/lib/Parser/prescan.h

Removed: 
    


################################################################################
diff  --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 819f3cf99867..7f3a4a623f46 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -65,7 +65,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
   }
   currentCooked_ = &allCooked_.NewCookedSource();
   Prescanner prescanner{
-      messages_, *currentCooked_, allSources, preprocessor, options.features};
+      messages_, *currentCooked_, preprocessor, options.features};
   prescanner.set_fixedForm(options.isFixedForm)
       .set_fixedFormColumnLimit(options.fixedFormColumns)
       .AddCompilerDirectiveSentinel("dir$");

diff  --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 823adda8e95a..c5422cc0070f 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -73,15 +73,6 @@ TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
   return result;
 }
 
-static std::size_t AfterLastNonBlank(const TokenSequence &tokens) {
-  for (std::size_t j{tokens.SizeInTokens()}; j > 0; --j) {
-    if (!tokens.TokenAt(j - 1).IsBlank()) {
-      return j;
-    }
-  }
-  return 0;
-}
-
 static TokenSequence Stringify(
     const TokenSequence &tokens, AllSources &allSources) {
   TokenSequence result;
@@ -104,15 +95,56 @@ static TokenSequence Stringify(
   return result;
 }
 
-TokenSequence Definition::Apply(
-    const std::vector<TokenSequence> &args, AllSources &allSources) {
+constexpr bool IsTokenPasting(CharBlock opr) {
+  return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
+}
+
+static bool AnyTokenPasting(const TokenSequence &text) {
+  std::size_t tokens{text.SizeInTokens()};
+  for (std::size_t j{0}; j < tokens; ++j) {
+    if (IsTokenPasting(text.TokenAt(j))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static TokenSequence TokenPasting(TokenSequence &&text) {
+  if (!AnyTokenPasting(text)) {
+    return std::move(text);
+  }
   TokenSequence result;
+  std::size_t tokens{text.SizeInTokens()};
   bool pasting{false};
+  for (std::size_t j{0}; j < tokens; ++j) {
+    if (IsTokenPasting(text.TokenAt(j))) {
+      if (!pasting) {
+        while (!result.empty() &&
+            result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
+          result.pop_back();
+        }
+        if (!result.empty()) {
+          result.ReopenLastToken();
+          pasting = true;
+        }
+      }
+    } else if (pasting && text.TokenAt(j).IsBlank()) {
+    } else {
+      result.Put(text, j, 1);
+      pasting = false;
+    }
+  }
+  return result;
+}
+
+TokenSequence Definition::Apply(
+    const std::vector<TokenSequence> &args, Prescanner &prescanner) {
+  TokenSequence result;
   bool skipping{false};
   int parenthesesNesting{0};
   std::size_t tokens{replacement_.SizeInTokens()};
   for (std::size_t j{0}; j < tokens; ++j) {
-    const CharBlock &token{replacement_.TokenAt(j)};
+    CharBlock token{replacement_.TokenAt(j)};
     std::size_t bytes{token.size()};
     if (skipping) {
       if (bytes == 1) {
@@ -124,44 +156,49 @@ TokenSequence Definition::Apply(
       }
       continue;
     }
-    if (bytes == 2 && token[0] == '~') {
+    if (bytes == 2 && token[0] == '~') { // argument substitution
       std::size_t index = token[1] - 'A';
       if (index >= args.size()) {
         continue;
       }
-      std::size_t afterLastNonBlank{AfterLastNonBlank(result)};
-      if (afterLastNonBlank > 0 &&
-          result.TokenAt(afterLastNonBlank - 1).ToString() == "#") {
-        // stringifying
-        while (result.SizeInTokens() >= afterLastNonBlank) {
+      std::size_t prev{j};
+      while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
+        --prev;
+      }
+      if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
+          replacement_.TokenAt(prev - 1)[0] ==
+              '#') { // stringify argument without macro replacement
+        std::size_t resultSize{result.SizeInTokens()};
+        while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
           result.pop_back();
         }
-        result.Put(Stringify(args[index], allSources));
+        CHECK(resultSize > 0 &&
+            result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
+        result.pop_back();
+        result.Put(Stringify(args[index], prescanner.allSources()));
       } else {
-        std::size_t argTokens{args[index].SizeInTokens()};
-        for (std::size_t k{0}; k < argTokens; ++k) {
-          if (!pasting || !args[index].TokenAt(k).IsBlank()) {
-            result.Put(args[index], k);
-            pasting = false;
+        const TokenSequence *arg{&args[index]};
+        std::optional<TokenSequence> replaced;
+        // Don't replace macros in the actual argument if it is preceded or
+        // followed by the token-pasting operator ## in the replacement text.
+        if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
+          auto next{replacement_.SkipBlanks(j + 1)};
+          if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
+            // Apply macro replacement to the actual argument
+            replaced =
+                prescanner.preprocessor().MacroReplacement(*arg, prescanner);
+            if (replaced) {
+              arg = &*replaced;
+            }
           }
         }
+        result.Put(DEREF(arg));
       }
-    } else if (bytes == 2 && token[0] == '#' && token[1] == '#') {
-      // Token pasting operator in body (not expanded argument); discard any
-      // immediately preceding white space, then reopen the last token.
-      while (!result.empty() &&
-          result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
-        result.pop_back();
-      }
-      if (!result.empty()) {
-        result.ReopenLastToken();
-        pasting = true;
-      }
-    } else if (pasting && token.IsBlank()) {
-      // Delete whitespace immediately following ## in the body.
     } else if (bytes == 11 && isVariadic_ &&
         token.ToString() == "__VA_ARGS__") {
-      Provenance commaProvenance{allSources.CompilerInsertionProvenance(',')};
+      Provenance commaProvenance{
+          prescanner.preprocessor().allSources().CompilerInsertionProvenance(
+              ',')};
       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
         if (k > argumentCount_) {
           result.Put(","s, commaProvenance);
@@ -186,7 +223,7 @@ TokenSequence Definition::Apply(
       result.Put(replacement_, j);
     }
   }
-  return result;
+  return TokenPasting(std::move(result));
 }
 
 static std::string FormatTime(const std::time_t &now, const char *format) {
@@ -218,7 +255,7 @@ void Preprocessor::Define(std::string macro, std::string value) {
 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
 
 std::optional<TokenSequence> Preprocessor::MacroReplacement(
-    const TokenSequence &input, const Prescanner &prescanner) {
+    const TokenSequence &input, Prescanner &prescanner) {
   // Do quick scan for any use of a defined name.
   std::size_t tokens{input.SizeInTokens()};
   std::size_t j;
@@ -271,7 +308,8 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
         }
       }
       def.set_isDisabled(true);
-      TokenSequence replaced{ReplaceMacros(def.replacement(), prescanner)};
+      TokenSequence replaced{
+          TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
       def.set_isDisabled(false);
       if (!replaced.empty()) {
         ProvenanceRange from{def.replacement().GetProvenanceRange()};
@@ -333,7 +371,7 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
     }
     def.set_isDisabled(true);
     TokenSequence replaced{
-        ReplaceMacros(def.Apply(args, allSources_), prescanner)};
+        ReplaceMacros(def.Apply(args, prescanner), prescanner)};
     def.set_isDisabled(false);
     if (!replaced.empty()) {
       ProvenanceRange from{def.replacement().GetProvenanceRange()};
@@ -348,7 +386,7 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
 }
 
 TokenSequence Preprocessor::ReplaceMacros(
-    const TokenSequence &tokens, const Prescanner &prescanner) {
+    const TokenSequence &tokens, Prescanner &prescanner) {
   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
     return std::move(*repl);
   }

diff  --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h
index b4374a9acf5c..436adf5cdde0 100644
--- a/flang/lib/Parser/preprocessor.h
+++ b/flang/lib/Parser/preprocessor.h
@@ -28,6 +28,7 @@
 namespace Fortran::parser {
 
 class Prescanner;
+class Preprocessor;
 
 // Defines a macro
 class Definition {
@@ -46,7 +47,7 @@ class Definition {
 
   bool set_isDisabled(bool disable);
 
-  TokenSequence Apply(const std::vector<TokenSequence> &args, AllSources &);
+  TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &);
 
 private:
   static TokenSequence Tokenize(const std::vector<std::string> &argNames,
@@ -65,12 +66,15 @@ class Preprocessor {
 public:
   explicit Preprocessor(AllSources &);
 
+  const AllSources &allSources() const { return allSources_; }
+  AllSources &allSources() { return allSources_; }
+
   void Define(std::string macro, std::string value);
   void Undefine(std::string macro);
   bool IsNameDefined(const CharBlock &);
 
   std::optional<TokenSequence> MacroReplacement(
-      const TokenSequence &, const Prescanner &);
+      const TokenSequence &, Prescanner &);
 
   // Implements a preprocessor directive.
   void Directive(const TokenSequence &, Prescanner *);
@@ -80,7 +84,7 @@ class Preprocessor {
   enum class CanDeadElseAppear { No, Yes };
 
   CharBlock SaveTokenAsName(const CharBlock &);
-  TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &);
+  TokenSequence ReplaceMacros(const TokenSequence &, Prescanner &);
   void SkipDisabledConditionalCode(
       const std::string &, IsElseActive, Prescanner *, ProvenanceRange);
   bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,

diff  --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp
index 3eb909fc1ae8..dc6fbe529769 100644
--- a/flang/lib/Parser/prescan.cpp
+++ b/flang/lib/Parser/prescan.cpp
@@ -26,15 +26,14 @@ using common::LanguageFeature;
 static constexpr int maxPrescannerNesting{100};
 
 Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
-    AllSources &allSources, Preprocessor &preprocessor,
-    common::LanguageFeatureControl lfc)
-    : messages_{messages}, cooked_{cooked}, allSources_{allSources},
-      preprocessor_{preprocessor}, features_{lfc},
+    Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
+    : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
+      allSources_{preprocessor_.allSources()}, features_{lfc},
       encoding_{allSources_.encoding()} {}
 
 Prescanner::Prescanner(const Prescanner &that)
     : messages_{that.messages_}, cooked_{that.cooked_},
-      allSources_{that.allSources_}, preprocessor_{that.preprocessor_},
+      preprocessor_{that.preprocessor_}, allSources_{that.allSources_},
       features_{that.features_}, inFixedForm_{that.inFixedForm_},
       fixedFormColumnLimit_{that.fixedFormColumnLimit_},
       encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ +
@@ -489,7 +488,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
       // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
       // we don't misrecognize I9HOLLERITH as an identifier in the next case.
       EmitCharAndAdvance(tokens, *at_);
-    } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) {
+    } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
       EmitCharAndAdvance(tokens, *at_);
       QuotedCharacterLiteral(tokens, start);
     }
@@ -507,7 +506,8 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
   } else if (IsLegalInIdentifier(*at_)) {
     do {
     } while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_)));
-    if (*at_ == '\'' || *at_ == '"') {
+    if ((*at_ == '\'' || *at_ == '"') &&
+        tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
       QuotedCharacterLiteral(tokens, start);
     }
     preventHollerith_ = false;

diff  --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h
index ab56ed455040..2f1b83e35e3e 100644
--- a/flang/lib/Parser/prescan.h
+++ b/flang/lib/Parser/prescan.h
@@ -33,11 +33,16 @@ class Preprocessor;
 
 class Prescanner {
 public:
-  Prescanner(Messages &, CookedSource &, AllSources &, Preprocessor &,
+  Prescanner(Messages &, CookedSource &, Preprocessor &,
       common::LanguageFeatureControl);
   Prescanner(const Prescanner &);
 
-  Messages &messages() const { return messages_; }
+  const AllSources &allSources() const { return allSources_; }
+  AllSources &allSources() { return allSources_; }
+  const Messages &messages() const { return messages_; }
+  Messages &messages() { return messages_; }
+  const Preprocessor &preprocessor() const { return preprocessor_; }
+  Preprocessor &preprocessor() { return preprocessor_; }
 
   Prescanner &set_fixedForm(bool yes) {
     inFixedForm_ = yes;
@@ -181,8 +186,8 @@ class Prescanner {
 
   Messages &messages_;
   CookedSource &cooked_;
-  AllSources &allSources_;
   Preprocessor &preprocessor_;
+  AllSources &allSources_;
   common::LanguageFeatureControl features_;
   bool inFixedForm_{false};
   int fixedFormColumnLimit_{72};

diff  --git a/flang/test/Preprocessing/assert.F90 b/flang/test/Preprocessing/assert.F90
new file mode 100644
index 000000000000..64d49e6f1cbd
--- /dev/null
+++ b/flang/test/Preprocessing/assert.F90
@@ -0,0 +1,8 @@
+!RUN: %f18 -E %s 2>&1 | FileCheck %s
+!CHECK: if(.not.(.true.)) error stop "assert(" // ".TRUE." // ") failed " // "at ""
+!CHECK-SAME: assert.F90"": " // "7"
+#define STR(x) #x
+#define POSITION(f,ln) "at "f": " // STR(ln)
+#define assert(x) if(.not.(x)) error stop "assert(" // #x // ") failed " // POSITION(__FILE__,__LINE__)
+assert(.TRUE.)
+end


        


More information about the flang-commits mailing list