[clang] e8976e9 - [clang][Preprocessor] Add peekNextPPToken, makes look ahead next token without side-effects (#143898)

via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 24 03:55:26 PDT 2025


Author: yronglin
Date: 2025-06-24T18:55:21+08:00
New Revision: e8976e92f655f8c358081ffae01e10ea159cd37d

URL: https://github.com/llvm/llvm-project/commit/e8976e92f655f8c358081ffae01e10ea159cd37d
DIFF: https://github.com/llvm/llvm-project/commit/e8976e92f655f8c358081ffae01e10ea159cd37d.diff

LOG: [clang][Preprocessor] Add peekNextPPToken, makes look ahead next token without side-effects (#143898)

This PR introduce a new function `peekNextPPToken`. It's an extension of
`isNextPPTokenLParen` and can makes look ahead one token in preprocessor
without side-effects.

It's also the 1st part of
https://github.com/llvm/llvm-project/pull/107168 and it was used to look
ahead next token then determine whether current lexing pp directive is
one of pp-import or pp-module directive.

At the start of phase 4 an import or module token is treated as starting
a directive and are converted to their respective keywords iff:

 - After skipping horizontal whitespace are
    - at the start of a logical line, or
    - preceded by an export at the start of the logical line.
- Are followed by an identifier pp token (before macro expansion), or
    - <, ", or : (but not ::) pp tokens for import, or
    - ; for module
Otherwise the token is treated as an identifier.

---------

Signed-off-by: yronglin <yronglin777 at gmail.com>

Added: 
    

Modified: 
    clang/include/clang/Lex/Lexer.h
    clang/include/clang/Lex/Preprocessor.h
    clang/include/clang/Lex/TokenLexer.h
    clang/lib/Lex/Lexer.cpp
    clang/lib/Lex/PPDirectives.cpp
    clang/lib/Lex/PPMacroExpansion.cpp
    clang/lib/Lex/Preprocessor.cpp
    clang/lib/Lex/TokenLexer.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index ca812ba1583fb..7a178b5b5c85b 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
   //===--------------------------------------------------------------------===//
   // Context that changes as the file is lexed.
   // NOTE: any state that mutates when in raw mode must have save/restore code
-  // in Lexer::isNextPPTokenLParen.
+  // in Lexer::peekNextPPToken.
 
   // BufferPtr - Current pointer into the buffer.  This is the next character
   // to be lexed.
@@ -645,10 +645,10 @@ class Lexer : public PreprocessorLexer {
     BufferPtr = TokEnd;
   }
 
-  /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
-  /// tok::l_paren token, 0 if it is something else and 2 if there are no more
-  /// tokens in the buffer controlled by this lexer.
-  unsigned isNextPPTokenLParen();
+  /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+  /// buffer controlled by this lexer, otherwise return the next unexpanded
+  /// token.
+  std::optional<Token> peekNextPPToken();
 
   //===--------------------------------------------------------------------===//
   // Lexer character reading interfaces.

diff  --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 47830b428c8ad..0ec1cb4d0c5d8 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2302,10 +2302,41 @@ class Preprocessor {
     }
   }
 
-  /// Determine whether the next preprocessor token to be
-  /// lexed is a '('.  If so, consume the token and return true, if not, this
+  /// Check whether the next pp-token is one of the specificed token kind. this
   /// method should have no observable side-effect on the lexed tokens.
-  bool isNextPPTokenLParen();
+  template <tok::TokenKind K, tok::TokenKind... Ks> bool isNextPPTokenOneOf() {
+    // Do some quick tests for rejection cases.
+    std::optional<Token> Val;
+    if (CurLexer)
+      Val = CurLexer->peekNextPPToken();
+    else
+      Val = CurTokenLexer->peekNextPPToken();
+
+    if (!Val) {
+      // We have run off the end.  If it's a source file we don't
+      // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
+      // macro stack.
+      if (CurPPLexer)
+        return false;
+      for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
+        if (Entry.TheLexer)
+          Val = Entry.TheLexer->peekNextPPToken();
+        else
+          Val = Entry.TheTokenLexer->peekNextPPToken();
+
+        if (Val)
+          break;
+
+        // Ran off the end of a source file?
+        if (Entry.ThePPLexer)
+          return false;
+      }
+    }
+
+    // Okay, we found the token and return.  Otherwise we found the end of the
+    // translation unit.
+    return Val->is(K) || (... || Val->is(Ks));
+  }
 
 private:
   /// Identifiers used for SEH handling in Borland. These are only

diff  --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 4d229ae610674..777b4e6266c71 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -139,10 +139,9 @@ class TokenLexer {
   void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
             bool OwnsTokens, bool IsReinject);
 
-  /// If the next token lexed will pop this macro off the
-  /// expansion stack, return 2.  If the next unexpanded token is a '(', return
-  /// 1, otherwise return 0.
-  unsigned isNextTokenLParen() const;
+  /// If the next token lexed will pop this macro off the expansion stack,
+  /// return std::nullopt, otherwise return the next unexpanded token.
+  std::optional<Token> peekNextPPToken() const;
 
   /// Lex and return a token from this macro stream.
   bool Lex(Token &Tok);

diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index b61ea3b1614c7..f4d16ecce393c 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3202,18 +3202,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
   return PP->HandleEndOfFile(Result, isPragmaLexer());
 }
 
-/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
-/// the specified lexer will return a tok::l_paren token, 0 if it is something
-/// else and 2 if there are no more tokens in the buffer controlled by the
-/// lexer.
-unsigned Lexer::isNextPPTokenLParen() {
+/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+/// buffer controlled by this lexer, otherwise return the next unexpanded
+/// token.
+std::optional<Token> Lexer::peekNextPPToken() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
   if (isDependencyDirectivesLexer()) {
     if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
-      return 2;
-    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
-        tok::l_paren);
+      return std::nullopt;
+    Token Result;
+    (void)convertDependencyDirectiveToken(
+        DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
+    return Result;
   }
 
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
@@ -3242,8 +3243,8 @@ unsigned Lexer::isNextPPTokenLParen() {
   LexingRawMode = false;
 
   if (Tok.is(tok::eof))
-    return 2;
-  return Tok.is(tok::l_paren);
+    return std::nullopt;
+  return Tok;
 }
 
 /// Find the end of a version control conflict marker.

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 70934b9b1dec3..c8974e5a3528c 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -183,9 +183,9 @@ static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) {
     AttributeCommonInfo::AttrArgsInfo AttrArgsInfo =
         AttributeCommonInfo::getCXX11AttrArgsInfo(II);
     if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required)
-      return PP.isNextPPTokenLParen();
+      return PP.isNextPPTokenOneOf<tok::l_paren>();
 
-    return !PP.isNextPPTokenLParen() ||
+    return !PP.isNextPPTokenOneOf<tok::l_paren>() ||
            AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional;
   }
   return false;

diff  --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 97bdeb873d699..709cf3bb87c8e 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -418,44 +418,6 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
   return !llvm::is_contained(MI->params(), II);
 }
 
-/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
-/// lexed is a '('.  If so, consume the token and return true, if not, this
-/// method should have no observable side-effect on the lexed tokens.
-bool Preprocessor::isNextPPTokenLParen() {
-  // Do some quick tests for rejection cases.
-  unsigned Val;
-  if (CurLexer)
-    Val = CurLexer->isNextPPTokenLParen();
-  else
-    Val = CurTokenLexer->isNextTokenLParen();
-
-  if (Val == 2) {
-    // We have run off the end.  If it's a source file we don't
-    // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
-    // macro stack.
-    if (CurPPLexer)
-      return false;
-    for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
-      if (Entry.TheLexer)
-        Val = Entry.TheLexer->isNextPPTokenLParen();
-      else
-        Val = Entry.TheTokenLexer->isNextTokenLParen();
-
-      if (Val != 2)
-        break;
-
-      // Ran off the end of a source file?
-      if (Entry.ThePPLexer)
-        return false;
-    }
-  }
-
-  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we
-  // have found something that isn't a '(' or we found the end of the
-  // translation unit.  In either case, return false.
-  return Val == 1;
-}
-
 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
 /// expanded as a macro, handle it and return the next token as 'Identifier'.
 bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,

diff  --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 18b2f5f02d6c7..7fecbe9eee53c 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -813,14 +813,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
         // C99 6.10.3p10: If the preprocessing token immediately after the
         // macro name isn't a '(', this macro should not be expanded.
-        if (!MI->isFunctionLike() || isNextPPTokenLParen())
+        if (!MI->isFunctionLike() || isNextPPTokenOneOf<tok::l_paren>())
           return HandleMacroExpandedIdentifier(Identifier, MD);
       } else {
         // C99 6.10.3.4p2 says that a disabled macro may never again be
         // expanded, even if it's in a context where it could be expanded in the
         // future.
         Identifier.setFlag(Token::DisableExpand);
-        if (MI->isObjectLike() || isNextPPTokenLParen())
+        if (MI->isObjectLike() || isNextPPTokenOneOf<tok::l_paren>())
           Diag(Identifier, diag::pp_disabled_macro_expansion);
       }
     }

diff  --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 6e93416e01c0c..fbb8c4262d6da 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -921,13 +921,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
 }
 
 /// isNextTokenLParen - If the next token lexed will pop this macro off the
-/// expansion stack, return 2.  If the next unexpanded token is a '(', return
-/// 1, otherwise return 0.
-unsigned TokenLexer::isNextTokenLParen() const {
+/// expansion stack, return std::nullopt, otherwise return the next unexpanded
+/// token.
+std::optional<Token> TokenLexer::peekNextPPToken() const {
   // Out of tokens?
   if (isAtEnd())
-    return 2;
-  return Tokens[CurTokenIdx].is(tok::l_paren);
+    return std::nullopt;
+  return Tokens[CurTokenIdx];
 }
 
 /// isParsingPreprocessorDirective - Return true if we are in the middle of a


        


More information about the cfe-commits mailing list