[clang] 0529a34 - [clang][Preprocessor] Handle the first pp-token in EnterMainSourceFile (#145244)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 25 17:49:46 PDT 2025
Author: yronglin
Date: 2025-06-26T08:49:43+08:00
New Revision: 0529a346007cecab95c6820a60cb3e4e36f34990
URL: https://github.com/llvm/llvm-project/commit/0529a346007cecab95c6820a60cb3e4e36f34990
DIFF: https://github.com/llvm/llvm-project/commit/0529a346007cecab95c6820a60cb3e4e36f34990.diff
LOG: [clang][Preprocessor] Handle the first pp-token in EnterMainSourceFile (#145244)
Depends on [[clang][Preprocessor] Add peekNextPPToken, makes look ahead
next token without
side-effects](https://github.com/llvm/llvm-project/pull/143898).
This PR fix the performance regression that introduced in
https://github.com/llvm/llvm-project/pull/144233.
The original PR(https://github.com/llvm/llvm-project/pull/144233) handle
the first pp-token in the main source file in the macro
definition/expansion and `Lexer::Lex`, but the lexer is almost always on
the hot path, we may hit a performance regression. In this PR, we handle
the first pp-token in `Preprocessor::EnterMainSourceFile`.
---------
Signed-off-by: yronglin <yronglin777 at gmail.com>
Added:
Modified:
clang/include/clang/Lex/Preprocessor.h
clang/include/clang/Lex/TokenLexer.h
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/PPDirectives.cpp
clang/lib/Lex/PPMacroExpansion.cpp
clang/lib/Lex/Preprocessor.cpp
clang/lib/Sema/SemaModule.cpp
clang/unittests/Lex/LexerTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index dae12a6015439..4d82e20e5d4f3 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -350,8 +350,8 @@ class Preprocessor {
/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;
- /// First pp-token in current translation unit.
- std::optional<Token> FirstPPToken;
+ /// First pp-token source location in current translation unit.
+ SourceLocation FirstPPTokenLoc;
/// A position within a C++20 import-seq.
class StdCXXImportSeq {
@@ -1769,20 +1769,13 @@ class Preprocessor {
std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
bool ForHasEmbed);
- /// Whether the preprocessor already seen the first pp-token in main file.
- bool hasSeenMainFileFirstPPToken() const { return FirstPPToken.has_value(); }
-
- /// Record first pp-token and check if it has a Token::FirstPPToken flag.
- void HandleMainFileFirstPPToken(const Token &Tok) {
- if (!hasSeenMainFileFirstPPToken() && Tok.isFirstPPToken() &&
- SourceMgr.isWrittenInMainFile(Tok.getLocation()))
- FirstPPToken = Tok;
+ /// Get the start location of the first pp-token in main file.
+ SourceLocation getMainFileFirstPPTokenLoc() const {
+ assert(FirstPPTokenLoc.isValid() &&
+ "Did not see the first pp-token in the main file");
+ return FirstPPTokenLoc;
}
- Token getMainFileFirstPPToken() const {
- assert(FirstPPToken && "First main file pp-token doesn't exists");
- return *FirstPPToken;
- }
bool LexAfterModuleImport(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 777b4e6266c71..7ac933d8f9d45 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -139,8 +139,9 @@ class TokenLexer {
void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
bool OwnsTokens, bool IsReinject);
- /// If the next token lexed will pop this macro off the expansion stack,
- /// return std::nullopt, otherwise return the next unexpanded token.
+ /// If TokenLexer::isAtEnd returns true(the next token lexed will pop this
+ /// macro off the expansion stack), return std::nullopt, otherwise return the
+ /// next unexpanded token.
std::optional<Token> peekNextPPToken() const;
/// Lex and return a token from this macro stream.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index f4d16ecce393c..42ea7edf3aaad 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3228,6 +3228,7 @@ std::optional<Token> Lexer::peekNextPPToken() {
bool atStartOfLine = IsAtStartOfLine;
bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
bool leadingSpace = HasLeadingSpace;
+ bool isFirstPPToken = IsFirstPPToken;
Token Tok;
Lex(Tok);
@@ -3238,7 +3239,7 @@ std::optional<Token> Lexer::peekNextPPToken() {
HasLeadingSpace = leadingSpace;
IsAtStartOfLine = atStartOfLine;
IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
-
+ IsFirstPPToken = isFirstPPToken;
// Restore the lexer back to non-skipping mode.
LexingRawMode = false;
@@ -3740,10 +3741,6 @@ bool Lexer::Lex(Token &Result) {
bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
// (After the LexTokenInternal call, the lexer might be destroyed.)
assert((returnedToken || !isRawLex) && "Raw lex must succeed");
-
- if (returnedToken && Result.isFirstPPToken() && PP &&
- !PP->hasSeenMainFileFirstPPToken())
- PP->HandleMainFileFirstPPToken(Result);
return returnedToken;
}
@@ -4547,8 +4544,6 @@ const char *Lexer::convertDependencyDirectiveToken(
Result.setFlag((Token::TokenFlags)DDTok.Flags);
Result.setLength(DDTok.Length);
BufferPtr = TokPtr + DDTok.Length;
- if (PP && !PP->hasSeenMainFileFirstPPToken() && Result.isFirstPPToken())
- PP->HandleMainFileFirstPPToken(Result);
return TokPtr;
}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index b88624b22e622..e6da19d24f1c5 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1242,9 +1242,6 @@ void Preprocessor::HandleDirective(Token &Result) {
// pp-directive.
bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
- if (!hasSeenMainFileFirstPPToken())
- HandleMainFileFirstPPToken(Result);
-
// Save the '#' token in case we need to return it later.
Token SavedHash = Result;
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 709cf3bb87c8e..b8b91e32179af 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -431,9 +431,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
// to disable the optimization in this case.
if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro();
- if (!hasSeenMainFileFirstPPToken())
- HandleMainFileFirstPPToken(Identifier);
-
// If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
if (MI->isBuiltinMacro()) {
if (Callbacks)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 500cf6f8400e0..aac3dc9fa54b0 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -566,6 +566,21 @@ void Preprocessor::EnterMainSourceFile() {
// #imported, it won't be re-entered.
if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID))
markIncluded(*FE);
+
+ // Record the first PP token in the main file. This is used to generate
+ // better diagnostics for C++ modules.
+ //
+ // // This is a comment.
+ // #define FOO int // note: add 'module;' to the start of the file
+ // ^ FirstPPToken // to introduce a global module fragment.
+ //
+ // export module M; // error: module declaration must occur
+ // // at the start of the translation unit.
+ if (getLangOpts().CPlusPlusModules) {
+ std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken();
+ if (FirstPPTok && FirstPPTok->isFirstPPToken())
+ FirstPPTokenLoc = FirstPPTok->getLocation();
+ }
}
// Preprocess Predefines to populate the initial preprocessor state.
diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index fe70ce3fba6a5..7c982bcd63d73 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -337,11 +337,9 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
// tokens in a file (excluding the global module fragment.).
if (getLangOpts().CPlusPlusModules && !IntroducerIsFirstPPToken && !SeenGMF) {
Diag(ModuleLoc, diag::err_module_decl_not_at_start);
- SourceLocation BeginLoc = PP.getMainFileFirstPPToken().getLocation();
- if (BeginLoc.isValid()) {
- Diag(BeginLoc, diag::note_global_module_introducer_missing)
- << FixItHint::CreateInsertion(BeginLoc, "module;\n");
- }
+ SourceLocation BeginLoc = PP.getMainFileFirstPPTokenLoc();
+ Diag(BeginLoc, diag::note_global_module_introducer_missing)
+ << FixItHint::CreateInsertion(BeginLoc, "module;\n");
}
// C++23 [module.unit]p1: ... The identifiers module and import shall not
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 33c8abbec35a3..2adb55484be88 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -49,8 +49,7 @@ class LexerTest : public ::testing::Test {
}
std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
- TrivialModuleLoader &ModLoader,
- StringRef PreDefines = {}) {
+ TrivialModuleLoader &ModLoader) {
std::unique_ptr<llvm::MemoryBuffer> Buf =
llvm::MemoryBuffer::getMemBuffer(Source);
SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
@@ -63,7 +62,7 @@ class LexerTest : public ::testing::Test {
/*IILookup =*/nullptr,
/*OwnsHeaderSearch =*/false);
if (!PreDefines.empty())
- PP->setPredefines(PreDefines.str());
+ PP->setPredefines(PreDefines);
PP->Initialize(*Target);
PP->EnterMainSourceFile();
return PP;
@@ -111,6 +110,7 @@ class LexerTest : public ::testing::Test {
std::shared_ptr<TargetOptions> TargetOpts;
IntrusiveRefCntPtr<TargetInfo> Target;
std::unique_ptr<Preprocessor> PP;
+ std::string PreDefines;
};
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
@@ -773,6 +773,7 @@ TEST(LexerPreambleTest, PreambleBounds) {
}
TEST_F(LexerTest, CheckFirstPPToken) {
+ LangOpts.CPlusPlusModules = true;
{
TrivialModuleLoader ModLoader;
auto PP = CreatePP("// This is a comment\n"
@@ -781,9 +782,8 @@ TEST_F(LexerTest, CheckFirstPPToken) {
Token Tok;
PP->Lex(Tok);
EXPECT_TRUE(Tok.is(tok::kw_int));
- EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::kw_int));
+ EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc().isValid());
+ EXPECT_EQ(PP->getMainFileFirstPPTokenLoc(), Tok.getLocation());
}
{
TrivialModuleLoader ModLoader;
@@ -794,24 +794,28 @@ TEST_F(LexerTest, CheckFirstPPToken) {
Token Tok;
PP->Lex(Tok);
EXPECT_TRUE(Tok.is(tok::kw_int));
- EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::hash));
+ EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok,
+ PP->getSourceManager(), PP->getLangOpts(),
+ /*IgnoreWhiteSpace=*/false));
+ EXPECT_TRUE(Tok.isFirstPPToken());
+ EXPECT_TRUE(Tok.is(tok::hash));
}
{
+ PreDefines = "#define FOO int\n";
TrivialModuleLoader ModLoader;
auto PP = CreatePP("// This is a comment\n"
"FOO a;",
- ModLoader, "#define FOO int\n");
+ ModLoader);
Token Tok;
PP->Lex(Tok);
EXPECT_TRUE(Tok.is(tok::kw_int));
- EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken());
- EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::identifier));
- EXPECT_TRUE(
- PP->getMainFileFirstPPToken().getIdentifierInfo()->isStr("FOO"));
+ EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok,
+ PP->getSourceManager(), PP->getLangOpts(),
+ /*IgnoreWhiteSpace=*/false));
+ EXPECT_TRUE(Tok.isFirstPPToken());
+ EXPECT_TRUE(Tok.is(tok::raw_identifier));
+ EXPECT_TRUE(Tok.getRawIdentifier() == "FOO");
}
}
} // anonymous namespace
More information about the cfe-commits
mailing list