[clang] 247fa04 - [clang] Add a new annotation token: annot_repl_input_end

Jun Zhang via cfe-commits cfe-commits at lists.llvm.org
Tue May 16 05:11:18 PDT 2023


Author: Jun Zhang
Date: 2023-05-16T20:10:43+08:00
New Revision: 247fa04116a6cabf8378c6c72d90b2f705e969de

URL: https://github.com/llvm/llvm-project/commit/247fa04116a6cabf8378c6c72d90b2f705e969de
DIFF: https://github.com/llvm/llvm-project/commit/247fa04116a6cabf8378c6c72d90b2f705e969de.diff

LOG: [clang] Add a new annotation token: annot_repl_input_end

This patch is the first part of the below RFC:
https://discourse.llvm.org/t/rfc-handle-execution-results-in-clang-repl/68493

It adds an annotation token which will replace the original EOF token
when we are in the incremental C++ mode. In addition, when we're
parsing an ExprStmt and there's a missing semicolon after the
expression, we set a marker in the annotation token and continue
parsing.

Eventually, we propogate this info in ParseTopLevelStmtDecl and are able
to mark this Decl as something we want to do value printing. Below is a
example:

clang-repl> int x = 42;
clang-repl> x
// `x` is a TopLevelStmtDecl and without a semicolon, we should set
// it's IsSemiMissing bit so we can do something interesting in
// ASTConsumer::HandleTopLevelDecl.

The idea about annotation toke is proposed by Richard Smith, thanks!

Signed-off-by: Jun Zhang <jun at junz.org>

Differential Revision: https://reviews.llvm.org/D148997

Added: 
    

Modified: 
    clang/include/clang/AST/Decl.h
    clang/include/clang/Basic/TokenKinds.def
    clang/include/clang/Parse/Parser.h
    clang/lib/Frontend/PrintPreprocessedOutput.cpp
    clang/lib/Interpreter/IncrementalParser.cpp
    clang/lib/Lex/PPLexerChange.cpp
    clang/lib/Parse/ParseCXXInlineMethods.cpp
    clang/lib/Parse/ParseDecl.cpp
    clang/lib/Parse/ParseStmt.cpp
    clang/lib/Parse/Parser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 650baf56414c7..37512ccb81e59 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -4324,6 +4324,7 @@ class TopLevelStmtDecl : public Decl {
   friend class ASTDeclWriter;
 
   Stmt *Statement = nullptr;
+  bool IsSemiMissing = false;
 
   TopLevelStmtDecl(DeclContext *DC, SourceLocation L, Stmt *S)
       : Decl(TopLevelStmt, DC, L), Statement(S) {}
@@ -4337,6 +4338,12 @@ class TopLevelStmtDecl : public Decl {
   SourceRange getSourceRange() const override LLVM_READONLY;
   Stmt *getStmt() { return Statement; }
   const Stmt *getStmt() const { return Statement; }
+  void setStmt(Stmt *S) {
+    assert(IsSemiMissing && "Operation supported for printing values only!");
+    Statement = S;
+  }
+  bool isSemiMissing() const { return IsSemiMissing; }
+  void setSemiMissing(bool Missing = true) { IsSemiMissing = Missing; }
 
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == TopLevelStmt; }

diff  --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index f17a6028a137a..ae67209d9b9e2 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -942,6 +942,9 @@ ANNOTATION(module_end)
 // into the name of a header unit.
 ANNOTATION(header_unit)
 
+// Annotation for end of input in clang-repl.
+ANNOTATION(repl_input_end)
+
 #undef PRAGMA_ANNOTATION
 #undef ANNOTATION
 #undef TESTING_KEYWORD

diff  --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index fc892d71b51bf..17aa11b74add1 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -18,6 +18,7 @@
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/DeclSpec.h"
@@ -692,7 +693,8 @@ class Parser : public CodeCompletionHandler {
   bool isEofOrEom() {
     tok::TokenKind Kind = Tok.getKind();
     return Kind == tok::eof || Kind == tok::annot_module_begin ||
-           Kind == tok::annot_module_end || Kind == tok::annot_module_include;
+           Kind == tok::annot_module_end || Kind == tok::annot_module_include ||
+           Kind == tok::annot_repl_input_end;
   }
 
   /// Checks if the \p Level is valid for use in a fold expression.

diff  --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index ffa85e523c038..1b262d9e6f7cb 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -663,7 +663,8 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
   // them.
   if (Tok.is(tok::eof) ||
       (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
-       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
+       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
+       !Tok.is(tok::annot_repl_input_end)))
     return;
 
   // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -819,6 +820,9 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
       PP.Lex(Tok);
       continue;
+    } else if (Tok.is(tok::annot_repl_input_end)) {
+      PP.Lex(Tok);
+      continue;
     } else if (Tok.is(tok::eod)) {
       // Don't print end of directive tokens, since they are typically newlines
       // that mess up our line tracking. These come from unknown pre-processor

diff  --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp
index 373e2844b4e4c..2b932623a5fea 100644
--- a/clang/lib/Interpreter/IncrementalParser.cpp
+++ b/clang/lib/Interpreter/IncrementalParser.cpp
@@ -158,8 +158,8 @@ IncrementalParser::ParseOrWrapTopLevelDecl() {
   LastPTU.TUPart = C.getTranslationUnitDecl();
 
   // Skip previous eof due to last incremental input.
-  if (P->getCurToken().is(tok::eof)) {
-    P->ConsumeToken();
+  if (P->getCurToken().is(tok::annot_repl_input_end)) {
+    P->ConsumeAnyToken();
     // FIXME: Clang does not call ExitScope on finalizing the regular TU, we
     // might want to do that around HandleEndOfTranslationUnit.
     P->ExitScope();
@@ -259,14 +259,14 @@ IncrementalParser::Parse(llvm::StringRef input) {
     Token Tok;
     do {
       PP.Lex(Tok);
-    } while (Tok.isNot(tok::eof));
+    } while (Tok.isNot(tok::annot_repl_input_end));
+  } else {
+    Token AssertTok;
+    PP.Lex(AssertTok);
+    assert(AssertTok.is(tok::annot_repl_input_end) &&
+           "Lexer must be EOF when starting incremental parse!");
   }
 
-  Token AssertTok;
-  PP.Lex(AssertTok);
-  assert(AssertTok.is(tok::eof) &&
-         "Lexer must be EOF when starting incremental parse!");
-
   if (CodeGenerator *CG = getCodeGen(Act.get())) {
     std::unique_ptr<llvm::Module> M(CG->ReleaseModule());
     CG->StartModule("incr_module_" + std::to_string(PTUs.size()),

diff  --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index be6128e557f40..e82e47317127b 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -535,13 +535,19 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
       return LeavingSubmodule;
     }
   }
-
   // If this is the end of the main file, form an EOF token.
   assert(CurLexer && "Got EOF but no current lexer set!");
   const char *EndPos = getCurLexerEndPos();
   Result.startToken();
   CurLexer->BufferPtr = EndPos;
-  CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+
+  if (isIncrementalProcessingEnabled()) {
+    CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_repl_input_end);
+    Result.setAnnotationEndLoc(Result.getLocation());
+    Result.setAnnotationValue(nullptr);
+  } else {
+    CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+  }
 
   if (isCodeCompletionEnabled()) {
     // Inserting the code-completion point increases the source buffer by 1,

diff  --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 43a0ea4f0ed77..4951eb9aa2802 100644
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -838,6 +838,7 @@ bool Parser::ConsumeAndStoreUntil(tok::TokenKind T1, tok::TokenKind T2,
     case tok::annot_module_begin:
     case tok::annot_module_end:
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
       // Ran out of tokens.
       return false;
 
@@ -1244,6 +1245,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
     case tok::annot_module_begin:
     case tok::annot_module_end:
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
       // Ran out of tokens.
       return false;
 

diff  --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 92fa7d8a8a759..dc93b10f80c42 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2070,6 +2070,7 @@ void Parser::SkipMalformedDecl() {
     case tok::annot_module_begin:
     case tok::annot_module_end:
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
       return;
 
     default:
@@ -5456,6 +5457,13 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() {
 
   SmallVector<Decl *, 2> DeclsInGroup;
   DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(R.get()));
+
+  if (Tok.is(tok::annot_repl_input_end) &&
+      Tok.getAnnotationValue() != nullptr) {
+    ConsumeAnnotationToken();
+    cast<TopLevelStmtDecl>(DeclsInGroup.back())->setSemiMissing();
+  }
+
   // Currently happens for things like  -fms-extensions and use `__if_exists`.
   for (Stmt *S : Stmts)
     DeclsInGroup.push_back(Actions.ActOnTopLevelStmtDecl(S));

diff  --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index b423c5c47051f..bde9df0877382 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -544,9 +544,22 @@ StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) {
     return ParseCaseStatement(StmtCtx, /*MissingCase=*/true, Expr);
   }
 
-  // Otherwise, eat the semicolon.
-  ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
-  return handleExprStmt(Expr, StmtCtx);
+  Token *CurTok = nullptr;
+  // If the semicolon is missing at the end of REPL input, consider if
+  // we want to do value printing. Note this is only enabled in C++ mode
+  // since part of the implementation requires C++ language features.
+  // Note we shouldn't eat the token since the callback needs it.
+  if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus)
+    CurTok = &Tok;
+  else
+    // Otherwise, eat the semicolon.
+    ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
+
+  StmtResult R = handleExprStmt(Expr, StmtCtx);
+  if (CurTok && !R.isInvalid())
+    CurTok->setAnnotationValue(R.get());
+
+  return R;
 }
 
 /// ParseSEHTryBlockCommon

diff  --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index c0f4556d743b2..bebc3d6d7c4dc 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -320,6 +320,7 @@ bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
     case tok::annot_module_begin:
     case tok::annot_module_end:
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
       // Stop before we change submodules. They generally indicate a "good"
       // place to pick up parsing again (except in the special case where
       // we're trying to skip to EOF).
@@ -614,11 +615,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
                                Sema::ModuleImportState &ImportState) {
   DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(*this);
 
-  // Skip over the EOF token, flagging end of previous input for incremental
-  // processing
-  if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
-    ConsumeToken();
-
   Result = nullptr;
   switch (Tok.getKind()) {
   case tok::annot_pragma_unused:
@@ -697,6 +693,7 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     return false;
 
   case tok::eof:
+  case tok::annot_repl_input_end:
     // Check whether -fmax-tokens= was reached.
     if (PP.getMaxTokens() != 0 && PP.getTokenCount() > PP.getMaxTokens()) {
       PP.Diag(Tok.getLocation(), diag::warn_max_tokens_total)


        


More information about the cfe-commits mailing list