r358230 - Remove use of lookahead from _Pragma handling and from all other

Thu Apr 11 14:18:22 PDT 2019

Author: rsmith
Date: Thu Apr 11 14:18:22 2019
New Revision: 358230

URL: http://llvm.org/viewvc/llvm-project?rev=358230&view=rev
Log:
Remove use of lookahead from _Pragma handling and from all other
internal lexing steps in the preprocessor.

It is not safe to use the preprocessor's token lookahead except when
operating on the final sequence of tokens that would be produced by
phase 4 of translation. Doing so corrupts the token lookahead cache used
by the parser. (See added testcase for an example.) Lookahead should
instead be viewed as a layer on top of the normal lexer.

Added assertions to catch any further incorrect uses of lookahead within
lexing actions.

Added:
    cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp
Modified:
    cfe/trunk/include/clang/Lex/Preprocessor.h
    cfe/trunk/lib/Lex/PPCaching.cpp
    cfe/trunk/lib/Lex/PPDirectives.cpp
    cfe/trunk/lib/Lex/Pragma.cpp
    cfe/trunk/lib/Lex/Preprocessor.cpp

Modified: cfe/trunk/include/clang/Lex/Preprocessor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=358230&r1=358229&r2=358230&view=diff
==============================================================================

--- cfe/trunk/include/clang/Lex/Preprocessor.h (original)
+++ cfe/trunk/include/clang/Lex/Preprocessor.h Thu Apr 11 14:18:22 2019
@@ -323,6 +323,14 @@ class Preprocessor {
   /// to avoid hitting the same error over and over again.
   bool HasReachedMaxIncludeDepth = false;
 
+  /// The number of currently-active calls to Lex.
+  ///
+  /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
+  /// require asking for multiple additional tokens. This counter makes it
+  /// possible for Lex to detect whether it's producing a token for the end
+  /// of phase 4 of translation or for some other situation.
+  unsigned LexLevel = 0;
+
 public:
   struct PreambleSkipInfo {
     SourceLocation HashTokenLoc;
@@ -1244,24 +1252,6 @@ public:
   /// Disable the last EnableBacktrackAtThisPos call.
   void CommitBacktrackedTokens();
 
-  struct CachedTokensRange {
-    CachedTokensTy::size_type Begin, End;
-  };
-
-private:
-  /// A range of cached tokens that should be erased after lexing
-  /// when backtracking requires the erasure of such cached tokens.
-  Optional<CachedTokensRange> CachedTokenRangeToErase;
-
-public:
-  /// Returns the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  CachedTokensRange LastCachedTokenRange();
-
-  /// Erase the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  void EraseCachedTokens(CachedTokensRange TokenRange);
-
   /// Make Preprocessor re-lex the tokens that were lexed since
   /// EnableBacktrackAtThisPos() was previously called.
   void Backtrack();
@@ -1353,6 +1343,7 @@ public:
   /// tokens after phase 5.  As such, it is equivalent to using
   /// 'Lex', not 'LexUnexpandedToken'.
   const Token &LookAhead(unsigned N) {
+    assert(LexLevel == 0 && "cannot use lookahead while lexing");
     if (CachedLexPos + N < CachedTokens.size())
       return CachedTokens[CachedLexPos+N];
     else
@@ -1379,8 +1370,16 @@ public:
   /// If BackTrack() is called afterwards, the token will remain at the
   /// insertion point.
   void EnterToken(const Token &Tok) {
-    EnterCachingLexMode();
-    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    if (LexLevel) {
+      // It's not correct in general to enter caching lex mode while in the
+      // middle of a nested lexing action.
+      auto TokCopy = llvm::make_unique<Token[]>(1);
+      TokCopy[0] = Tok;
+      EnterTokenStream(std::move(TokCopy), 1, true);
+    } else {
+      EnterCachingLexMode();
+      CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    }
   }
 
   /// We notify the Preprocessor that if it is caching tokens (because
@@ -2062,6 +2061,7 @@ private:
   }
 
   void EnterCachingLexMode();
+  void EnterCachingLexModeUnchecked();
 
   void ExitCachingLexMode() {
     if (InCachingLexMode())

Modified: cfe/trunk/lib/Lex/PPCaching.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPCaching.cpp?rev=358230&r1=358229&r2=358230&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PPCaching.cpp (original)
+++ cfe/trunk/lib/Lex/PPCaching.cpp Thu Apr 11 14:18:22 2019
@@ -23,6 +23,7 @@ using namespace clang;
 // be called multiple times and CommitBacktrackedTokens/Backtrack calls will
 // be combined with the EnableBacktrackAtThisPos calls in reverse order.
 void Preprocessor::EnableBacktrackAtThisPos() {
+  assert(LexLevel == 0 && "cannot use lookahead while lexing");
   BacktrackPositions.push_back(CachedLexPos);
   EnterCachingLexMode();
 }
@@ -34,29 +35,6 @@ void Preprocessor::CommitBacktrackedToke
   BacktrackPositions.pop_back();
 }
 
-Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() {
-  assert(isBacktrackEnabled());
-  auto PrevCachedLexPos = BacktrackPositions.back();
-  return CachedTokensRange{PrevCachedLexPos, CachedLexPos};
-}
-
-void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) {
-  assert(TokenRange.Begin <= TokenRange.End);
-  if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) {
-    // We have backtracked to the start of the token range as we want to consume
-    // them again. Erase the tokens only after consuming then.
-    assert(!CachedTokenRangeToErase);
-    CachedTokenRangeToErase = TokenRange;
-    return;
-  }
-  // The cached tokens were committed, so they should be erased now.
-  assert(TokenRange.End == CachedLexPos);
-  CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin,
-                     CachedTokens.begin() + TokenRange.End);
-  CachedLexPos = TokenRange.Begin;
-  ExitCachingLexMode();
-}
-
 // Make Preprocessor re-lex the tokens that were lexed since
 // EnableBacktrackAtThisPos() was previously called.
 void Preprocessor::Backtrack() {
@@ -71,15 +49,12 @@ void Preprocessor::CachingLex(Token &Res
   if (!InCachingLexMode())
     return;
 
+  // The assert in EnterCachingLexMode should prevent this from happening.
+  assert(LexLevel == 1 &&
+         "should not use token caching within the preprocessor");
+
   if (CachedLexPos < CachedTokens.size()) {
     Result = CachedTokens[CachedLexPos++];
-    // Erase the some of the cached tokens after they are consumed when
-    // asked to do so.
-    if (CachedTokenRangeToErase &&
-        CachedTokenRangeToErase->End == CachedLexPos) {
-      EraseCachedTokens(*CachedTokenRangeToErase);
-      CachedTokenRangeToErase = None;
-    }
     return;
   }
 
@@ -88,14 +63,14 @@ void Preprocessor::CachingLex(Token &Res
 
   if (isBacktrackEnabled()) {
     // Cache the lexed token.
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
     CachedTokens.push_back(Result);
     ++CachedLexPos;
     return;
   }
 
   if (CachedLexPos < CachedTokens.size()) {
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
   } else {
     // All cached tokens were consumed.
     CachedTokens.clear();
@@ -104,11 +79,23 @@ void Preprocessor::CachingLex(Token &Res
 }
 
 void Preprocessor::EnterCachingLexMode() {
+  // The caching layer sits on top of all the other lexers, so it's incorrect
+  // to cache tokens while inside a nested lex action. The cached tokens would
+  // be retained after returning to the enclosing lex action and, at best,
+  // would appear at the wrong position in the token stream.
+  assert(LexLevel == 0 &&
+         "entered caching lex mode while lexing something else");
+
   if (InCachingLexMode()) {
     assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
     return;
   }
 
+  EnterCachingLexModeUnchecked();
+}
+
+void Preprocessor::EnterCachingLexModeUnchecked() {
+  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
   PushIncludeMacroStack();
   CurLexerKind = CLK_CachingLexer;
 }

Modified: cfe/trunk/lib/Lex/PPDirectives.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPDirectives.cpp?rev=358230&r1=358229&r2=358230&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PPDirectives.cpp (original)
+++ cfe/trunk/lib/Lex/PPDirectives.cpp Thu Apr 11 14:18:22 2019
@@ -829,10 +829,10 @@ void Preprocessor::HandleSkippedDirectiv
       return HandleIncludeDirective(HashLoc, Result);
     }
     if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
-      Token P = LookAhead(0);
-      auto *II = P.getIdentifierInfo();
+      Lex(Result);
+      auto *II = Result.getIdentifierInfo();
       if (II && II->getName() == "hdrstop")
-        return HandlePragmaDirective(HashLoc, PIK_HashPragma);
+        return HandlePragmaHdrstop(Result);
     }
   }
   DiscardUntilEndOfDirective();

Modified: cfe/trunk/lib/Lex/Pragma.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Pragma.cpp?rev=358230&r1=358229&r2=358230&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Pragma.cpp (original)
+++ cfe/trunk/lib/Lex/Pragma.cpp Thu Apr 11 14:18:22 2019
@@ -144,84 +144,72 @@ void Preprocessor::HandlePragmaDirective
     DiscardUntilEndOfDirective();
 }
 
-namespace {
-
-/// Helper class for \see Preprocessor::Handle_Pragma.
-class LexingFor_PragmaRAII {
-  Preprocessor &PP;
-  bool InMacroArgPreExpansion;
-  bool Failed = false;
-  Token &OutTok;
-  Token PragmaTok;
-
-public:
-  LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
-                       Token &Tok)
-      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
-    if (InMacroArgPreExpansion) {
-      PragmaTok = OutTok;
-      PP.EnableBacktrackAtThisPos();
-    }
-  }
-
-  ~LexingFor_PragmaRAII() {
-    if (InMacroArgPreExpansion) {
-      // When committing/backtracking the cached pragma tokens in a macro
-      // argument pre-expansion we want to ensure that either the tokens which
-      // have been committed will be removed from the cache or that the tokens
-      // over which we just backtracked won't remain in the cache after they're
-      // consumed and that the caching will stop after consuming them.
-      // Otherwise the caching will interfere with the way macro expansion
-      // works, because we will continue to cache tokens after consuming the
-      // backtracked tokens, which shouldn't happen when we're dealing with
-      // macro argument pre-expansion.
-      auto CachedTokenRange = PP.LastCachedTokenRange();
-      if (Failed) {
-        PP.CommitBacktrackedTokens();
-      } else {
-        PP.Backtrack();
-        OutTok = PragmaTok;
-      }
-      PP.EraseCachedTokens(CachedTokenRange);
-    }
-  }
-
-  void failed() {
-    Failed = true;
-  }
-};
-
-} // namespace
-
 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
 /// return the first token after the directive.  The _Pragma token has just
 /// been read into 'Tok'.
 void Preprocessor::Handle_Pragma(Token &Tok) {
-  // This works differently if we are pre-expanding a macro argument.
-  // In that case we don't actually "activate" the pragma now, we only lex it
-  // until we are sure it is lexically correct and then we backtrack so that
-  // we activate the pragma whenever we encounter the tokens again in the token
-  // stream. This ensures that we will activate it in the correct location
-  // or that we will ignore it if it never enters the token stream, e.g:
+  // C11 6.10.3.4/3:
+  //   all pragma unary operator expressions within [a completely
+  //   macro-replaced preprocessing token sequence] are [...] processed [after
+  //   rescanning is complete]
+  //
+  // This means that we execute _Pragma operators in two cases:
+  //
+  //  1) on token sequences that would otherwise be produced as the output of
+  //     phase 4 of preprocessing, and
+  //  2) on token sequences formed as the macro-replaced token sequence of a
+  //     macro argument
   //
-  //     #define EMPTY(x)
-  //     #define INACTIVE(x) EMPTY(x)
-  //     INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\""))
+  // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+  // the end of phase 4 should actually be executed. Discussion on the WG14
+  // mailing list suggests that a _Pragma operator is notionally checked early,
+  // but only pragmas that survive to the end of phase 4 should be executed.
+  //
+  // In Case #2, we check the syntax now, but then put the tokens back into the
+  // token stream for later consumption.
+
+  struct TokenCollector {
+    Preprocessor &Self;
+    bool Collect;
+    SmallVector<Token, 3> Tokens;
+    Token &Tok;
+
+    void lex() {
+      if (Collect)
+        Tokens.push_back(Tok);
+      Self.Lex(Tok);
+    }
+
+    void revert() {
+      assert(Collect && "did not collect tokens");
+      assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+      // Push the ( "string" ) tokens into the token stream.
+      auto Toks = llvm::make_unique<Token[]>(Tokens.size());
+      std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+      Toks[Tokens.size() - 1] = Tok;
+      Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+                            /*DisableMacroExpansion*/ true);
+
+      // ... and return the _Pragma token unchanged.
+      Tok = *Tokens.begin();
+    }
+  };
 
-  LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok);
+  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
 
   // Remember the pragma token location.
   SourceLocation PragmaLoc = Tok.getLocation();
 
   // Read the '('.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::l_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Read the '"..."'.
-  Lex(Tok);
+  Toks.lex();
   if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip bad tokens, and the ')', if present.
@@ -233,7 +221,7 @@ void Preprocessor::Handle_Pragma(Token &
       Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   if (Tok.hasUDSuffix()) {
@@ -242,21 +230,24 @@ void Preprocessor::Handle_Pragma(Token &
     Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Remember the string.
   Token StrTok = Tok;
 
   // Read the ')'.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::r_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
-  if (InMacroArgPreExpansion)
+  // If we're expanding a macro argument, put the tokens back.
+  if (InMacroArgPreExpansion) {
+    Toks.revert();
     return;
+  }
 
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);

Modified: cfe/trunk/lib/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Preprocessor.cpp?rev=358230&r1=358229&r2=358230&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Preprocessor.cpp (original)
+++ cfe/trunk/lib/Lex/Preprocessor.cpp Thu Apr 11 14:18:22 2019
@@ -862,6 +862,8 @@ bool Preprocessor::HandleIdentifier(Toke
 }
 
 void Preprocessor::Lex(Token &Result) {
+  ++LexLevel;
+
   // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
   do {
@@ -893,6 +895,7 @@ void Preprocessor::Lex(Token &Result) {
   }
 
   LastTokenWasAt = Result.is(tok::at);
+  --LexLevel;
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if

Added: cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp?rev=358230&view=auto
==============================================================================
--- cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp (added)
+++ cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp Thu Apr 11 14:18:22 2019
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -verify -Wconversion
+
+#define P(X) _Pragma(#X)
+#define V(X) X
+
+#define X \
+  P(clang diagnostic push) \
+  P(clang diagnostic ignored "-Wconversion") \
+  ) = 1.2; \
+  P(clang diagnostic pop)
+
+void f() {
+  int a = 1.2; // expected-warning {{changes value}}
+
+  // Note, we intentionally enter a tentatively-parsed context here to trigger
+  // regular use of lookahead. This would go wrong if _Pragma checking in macro
+  // argument pre-expansion also tries to use token lookahead.
+  int (b
+  V(X)
+
+  int c = 1.2; // expected-warning {{changes value}}
+}