[llvm-branch-commits] [clang] release/21.x: [clang] Allow trivial pp-directives before C++ module directive (#153641) (PR #154077)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 18 01:47:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport e6e874ce8f055f5b8c5d7f8c7fb0afe764d1d350
Requested by: @<!-- -->yronglin
---
Patch is 49.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154077.diff
14 Files Affected:
- (modified) clang/include/clang/Lex/Lexer.h (-3)
- (added) clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h (+310)
- (modified) clang/include/clang/Lex/Preprocessor.h (+12)
- (modified) clang/include/clang/Lex/Token.h (+9-8)
- (modified) clang/include/clang/Sema/Sema.h (+1-1)
- (modified) clang/lib/Lex/Lexer.cpp (-9)
- (modified) clang/lib/Lex/Preprocessor.cpp (+37-3)
- (modified) clang/lib/Parse/Parser.cpp (+5-3)
- (modified) clang/lib/Sema/SemaModule.cpp (+4-2)
- (modified) clang/test/CXX/module/cpp.pre/module_decl.cpp (+140-1)
- (modified) clang/unittests/Lex/CMakeLists.txt (+1)
- (modified) clang/unittests/Lex/LexerTest.cpp (+2-2)
- (modified) clang/unittests/Lex/ModuleDeclStateTest.cpp (+59-69)
- (added) clang/unittests/Lex/NoTrivialPPDirectiveTracerTest.cpp (+182)
``````````diff
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 06971ff87ab96..423f2ffe2f852 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -143,9 +143,6 @@ class Lexer : public PreprocessorLexer {
/// True if this is the first time we're lexing the input file.
bool IsFirstTimeLexingFile;
- /// True if current lexing token is the first pp-token.
- bool IsFirstPPToken;
-
// NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n',
// it also points to '\n.'
const char *NewLinePtr;
diff --git a/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
new file mode 100644
index 0000000000000..9ab3c6a528a1a
--- /dev/null
+++ b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
@@ -0,0 +1,310 @@
+//===--- NoTrivialPPDirectiveTracer.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the NoTrivialPPDirectiveTracer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+#define LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+
+#include "clang/Lex/PPCallbacks.h"
+
+namespace clang {
+class Preprocessor;
+
+/// Consider the following code:
+///
+/// # 1 __FILE__ 1 3
+/// export module a;
+///
+/// According to the wording in
+/// [P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html):
+///
+/// A module directive may only appear as the first preprocessing tokens in a
+/// file (excluding the global module fragment.)
+///
+/// and the wording in
+/// [[cpp.pre]](https://eel.is/c++draft/cpp.pre#nt:module-file):
+/// module-file:
+/// pp-global-module-fragment[opt] pp-module group[opt]
+/// pp-private-module-fragment[opt]
+///
+/// `#` is the first pp-token in the translation unit, and it was rejected by
+/// clang, but they really should be exempted from this rule. The goal is to not
+/// allow any preprocessor conditionals or most state changes, but these don't
+/// fit that.
+///
+/// State change would mean most semantically observable preprocessor state,
+/// particularly anything that is order dependent. Global flags like being a
+/// system header/module shouldn't matter.
+///
+/// We should exempt a brunch of directives, even though it violates the current
+/// standard wording.
+///
+/// This class used to trace 'no-trivial' pp-directives in main file, which may
+/// change the preprocessing state.
+///
+/// FIXME: Once the wording of the standard is revised, we need to follow the
+/// wording of the standard. Currently this is just a workaround
+class NoTrivialPPDirectiveTracer : public PPCallbacks {
+ Preprocessor &PP;
+
+ /// Whether preprocessing main file. We only focus on the main file.
+ bool InMainFile = true;
+
+ /// Whether one or more conditional, include or other 'no-trivial'
+ /// pp-directives has seen before.
+ bool SeenNoTrivialPPDirective = false;
+
+ void setSeenNoTrivialPPDirective();
+
+public:
+ NoTrivialPPDirectiveTracer(Preprocessor &P) : PP(P) {}
+
+ bool hasSeenNoTrivialPPDirective() const;
+
+ /// Callback invoked whenever the \p Lexer moves to a different file for
+ /// lexing. Unlike \p FileChanged line number directives and other related
+ /// pragmas do not trigger callbacks to \p LexedFileChanged.
+ ///
+ /// \param FID The \p FileID that the \p Lexer moved to.
+ ///
+ /// \param Reason Whether the \p Lexer entered a new file or exited one.
+ ///
+ /// \param FileType The \p CharacteristicKind of the file the \p Lexer moved
+ /// to.
+ ///
+ /// \param PrevFID The \p FileID the \p Lexer was using before the change.
+ ///
+ /// \param Loc The location where the \p Lexer entered a new file from or the
+ /// location that the \p Lexer moved into after exiting a file.
+ void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType, FileID PrevFID,
+ SourceLocation Loc) override;
+
+ /// Callback invoked whenever an embed directive has been processed,
+ /// regardless of whether the embed will actually find a file.
+ ///
+ /// \param HashLoc The location of the '#' that starts the embed directive.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \param IsAngled Whether the file name was enclosed in angle brackets;
+ /// otherwise, it was enclosed in quotes.
+ ///
+ /// \param File The actual file that may be included by this embed directive.
+ ///
+ /// \param Params The parameters used by the directive.
+ void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled,
+ OptionalFileEntryRef File,
+ const LexEmbedParametersResult &Params) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Callback invoked whenever an inclusion directive of
+ /// any kind (\c \#include, \c \#import, etc.) has been processed, regardless
+ /// of whether the inclusion will actually result in an inclusion.
+ ///
+ /// \param HashLoc The location of the '#' that starts the inclusion
+ /// directive.
+ ///
+ /// \param IncludeTok The token that indicates the kind of inclusion
+ /// directive, e.g., 'include' or 'import'.
+ ///
+ /// \param FileName The name of the file being included, as written in the
+ /// source code.
+ ///
+ /// \param IsAngled Whether the file name was enclosed in angle brackets;
+ /// otherwise, it was enclosed in quotes.
+ ///
+ /// \param FilenameRange The character range of the quotes or angle brackets
+ /// for the written file name.
+ ///
+ /// \param File The actual file that may be included by this inclusion
+ /// directive.
+ ///
+ /// \param SearchPath Contains the search path which was used to find the file
+ /// in the file system. If the file was found via an absolute include path,
+ /// SearchPath will be empty. For framework includes, the SearchPath and
+ /// RelativePath will be split up. For example, if an include of "Some/Some.h"
+ /// is found via the framework path
+ /// "path/to/Frameworks/Some.framework/Headers/Some.h", SearchPath will be
+ /// "path/to/Frameworks/Some.framework/Headers" and RelativePath will be
+ /// "Some.h".
+ ///
+ /// \param RelativePath The path relative to SearchPath, at which the include
+ /// file was found. This is equal to FileName except for framework includes.
+ ///
+ /// \param SuggestedModule The module suggested for this header, if any.
+ ///
+ /// \param ModuleImported Whether this include was translated into import of
+ /// \p SuggestedModule.
+ ///
+ /// \param FileType The characteristic kind, indicates whether a file or
+ /// directory holds normal user code, system code, or system code which is
+ /// implicitly 'extern "C"' in C++ mode.
+ ///
+ void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+ StringRef FileName, bool IsAngled,
+ CharSourceRange FilenameRange,
+ OptionalFileEntryRef File, StringRef SearchPath,
+ StringRef RelativePath, const Module *SuggestedModule,
+ bool ModuleImported,
+ SrcMgr::CharacteristicKind FileType) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Callback invoked whenever there was an explicit module-import
+ /// syntax.
+ ///
+ /// \param ImportLoc The location of import directive token.
+ ///
+ /// \param Path The identifiers (and their locations) of the module
+ /// "path", e.g., "std.vector" would be split into "std" and "vector".
+ ///
+ /// \param Imported The imported module; can be null if importing failed.
+ ///
+ void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
+ const Module *Imported) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Callback invoked when the end of the main file is reached.
+ ///
+ /// No subsequent callbacks will be made.
+ void EndOfMainFile() override { setSeenNoTrivialPPDirective(); }
+
+ /// Callback invoked when start reading any pragma directive.
+ void PragmaDirective(SourceLocation Loc,
+ PragmaIntroducerKind Introducer) override {}
+
+ /// Called by Preprocessor::HandleMacroExpandedIdentifier when a
+ /// macro invocation is found.
+ void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD,
+ SourceRange Range, const MacroArgs *Args) override;
+
+ /// Hook called whenever a macro definition is seen.
+ void MacroDefined(const Token &MacroNameTok,
+ const MacroDirective *MD) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever a macro \#undef is seen.
+ /// \param MacroNameTok The active Token
+ /// \param MD A MacroDefinition for the named macro.
+ /// \param Undef New MacroDirective if the macro was defined, null otherwise.
+ ///
+ /// MD is released immediately following this callback.
+ void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD,
+ const MacroDirective *Undef) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever the 'defined' operator is seen.
+ /// \param MD The MacroDirective if the name was a macro, null otherwise.
+ void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
+ SourceRange Range) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#if is seen.
+ /// \param Loc the source location of the directive.
+ /// \param ConditionRange The SourceRange of the expression being tested.
+ /// \param ConditionValue The evaluated value of the condition.
+ ///
+ // FIXME: better to pass in a list (or tree!) of Tokens.
+ void If(SourceLocation Loc, SourceRange ConditionRange,
+ ConditionValueKind ConditionValue) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#elif is seen.
+ /// \param Loc the source location of the directive.
+ /// \param ConditionRange The SourceRange of the expression being tested.
+ /// \param ConditionValue The evaluated value of the condition.
+ /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive.
+ // FIXME: better to pass in a list (or tree!) of Tokens.
+ void Elif(SourceLocation Loc, SourceRange ConditionRange,
+ ConditionValueKind ConditionValue, SourceLocation IfLoc) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#ifdef is seen.
+ /// \param Loc the source location of the directive.
+ /// \param MacroNameTok Information on the token being tested.
+ /// \param MD The MacroDefinition if the name was a macro, null otherwise.
+ void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#elifdef branch is taken.
+ /// \param Loc the source location of the directive.
+ /// \param MacroNameTok Information on the token being tested.
+ /// \param MD The MacroDefinition if the name was a macro, null otherwise.
+ void Elifdef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) override {
+ setSeenNoTrivialPPDirective();
+ }
+ /// Hook called whenever an \#elifdef is skipped.
+ /// \param Loc the source location of the directive.
+ /// \param ConditionRange The SourceRange of the expression being tested.
+ /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive.
+ // FIXME: better to pass in a list (or tree!) of Tokens.
+ void Elifdef(SourceLocation Loc, SourceRange ConditionRange,
+ SourceLocation IfLoc) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#ifndef is seen.
+ /// \param Loc the source location of the directive.
+ /// \param MacroNameTok Information on the token being tested.
+ /// \param MD The MacroDefiniton if the name was a macro, null otherwise.
+ void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#elifndef branch is taken.
+ /// \param Loc the source location of the directive.
+ /// \param MacroNameTok Information on the token being tested.
+ /// \param MD The MacroDefinition if the name was a macro, null otherwise.
+ void Elifndef(SourceLocation Loc, const Token &MacroNameTok,
+ const MacroDefinition &MD) override {
+ setSeenNoTrivialPPDirective();
+ }
+ /// Hook called whenever an \#elifndef is skipped.
+ /// \param Loc the source location of the directive.
+ /// \param ConditionRange The SourceRange of the expression being tested.
+ /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive.
+ // FIXME: better to pass in a list (or tree!) of Tokens.
+ void Elifndef(SourceLocation Loc, SourceRange ConditionRange,
+ SourceLocation IfLoc) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#else is seen.
+ /// \param Loc the source location of the directive.
+ /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive.
+ void Else(SourceLocation Loc, SourceLocation IfLoc) override {
+ setSeenNoTrivialPPDirective();
+ }
+
+ /// Hook called whenever an \#endif is seen.
+ /// \param Loc the source location of the directive.
+ /// \param IfLoc the source location of the \#if/\#ifdef/\#ifndef directive.
+ void Endif(SourceLocation Loc, SourceLocation IfLoc) override {
+ setSeenNoTrivialPPDirective();
+ }
+};
+
+} // namespace clang
+
+#endif // LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4d82e20e5d4f3..e90564a9739a5 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -82,6 +82,7 @@ class PreprocessorLexer;
class PreprocessorOptions;
class ScratchBuffer;
class TargetInfo;
+class NoTrivialPPDirectiveTracer;
namespace Builtin {
class Context;
@@ -353,6 +354,11 @@ class Preprocessor {
/// First pp-token source location in current translation unit.
SourceLocation FirstPPTokenLoc;
+ /// A preprocessor directive tracer to trace whether the preprocessing
+ /// state changed. These changes would mean most semantically observable
+ /// preprocessor state, particularly anything that is order dependent.
+ NoTrivialPPDirectiveTracer *DirTracer = nullptr;
+
/// A position within a C++20 import-seq.
class StdCXXImportSeq {
public:
@@ -609,6 +615,8 @@ class Preprocessor {
return State == NamedModuleImplementation && !getName().contains(':');
}
+ bool isNotAModuleDecl() const { return State == NotAModuleDecl; }
+
StringRef getName() const {
assert(isNamedModule() && "Can't get name from a non named module");
return Name;
@@ -3087,6 +3095,10 @@ class Preprocessor {
bool setDeserializedSafeBufferOptOutMap(
const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
+ /// Whether we've seen pp-directives which may have changed the preprocessing
+ /// state.
+ bool hasSeenNoTrivialPPDirective() const;
+
private:
/// Helper functions to forward lexing to the actual lexer. They all share the
/// same signature.
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index fc43e72593b94..d9dc5a562d802 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -86,12 +86,12 @@ class Token {
// macro stringizing or charizing operator.
CommaAfterElided = 0x200, // The comma following this token was elided (MS).
IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
-
- IsReinjected = 0x800, // A phase 4 token that was produced before and
- // re-added, e.g. via EnterTokenStream. Annotation
- // tokens are *not* reinjected.
- FirstPPToken = 0x1000, // This token is the first pp token in the
- // translation unit.
+ IsReinjected = 0x800, // A phase 4 token that was produced before and
+ // re-added, e.g. via EnterTokenStream. Annotation
+ // tokens are *not* reinjected.
+ HasSeenNoTrivialPPDirective =
+ 0x1000, // Whether we've seen any 'no-trivial' pp-directives before
+ // current position.
};
tok::TokenKind getKind() const { return Kind; }
@@ -321,8 +321,9 @@ class Token {
/// lexer uses identifier tokens to represent placeholders.
bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }
- /// Returns true if this token is the first pp-token.
- bool isFirstPPToken() const { return getFlag(FirstPPToken); }
+ bool hasSeenNoTrivialPPDirective() const {
+ return getFlag(HasSeenNoTrivialPPDirective);
+ }
};
/// Information about the conditional stack (\#if directives)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b331acbe606b7..7b0be368d67f9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9836,7 +9836,7 @@ class Sema final : public SemaBase {
SourceLocation ModuleLoc, ModuleDeclKind MDK,
ModuleIdPath Path, ModuleIdPath Partition,
ModuleImportState &ImportState,
- bool IntroducerIsFirstPPToken);
+ bool SeenNoTrivialPPDirective);
/// The parser has processed a global-module-fragment declaration that begins
/// the definition of the global module fragment of the current module unit.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 1f695b4a8676c..b282a600c0e56 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -174,8 +174,6 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
ExtendedTokenMode = 0;
NewLinePtr = nullptr;
-
- IsFirstPPToken = true;
}
/// Lexer constructor - Create a new lexer object for the specified buffer
@@ -3225,7 +3223,6 @@ std::optional<Token> Lexer::peekNextPPToken() {
bool atStartOfLine = IsAtStartOfLine;
bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
bool leadingSpace = HasLeadingSpace;
- bool isFirstPPToken = IsFirstPPToken;
Token Tok;
Lex(Tok);
@@ -3236,7 +3233,6 @@ std::optional<Token> Lexer::peekNextPPToken() {
HasLeadingSpace = leadingSpace;
IsAtStartOfLine = atStartOfLine;
IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
- IsFirstPPToken = isFirstPPToken;
// Restore the lexer back to non-skipping mode.
LexingRawMode = false;
@@ -3726,11 +3722,6 @@ bool Lexer::Lex(Token &Result) {
HasLeadingEmptyMacro = false;
}
- if (IsFirstPPToken) {
- Result.setFlag(Token::FirstPPToken);
- IsFirstPPToken = false;
- }
-
bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
IsAtPhysicalStartOfLine = false;
bool isRawLex = isLexingRawMode();
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index bcd3ea60ce3da..2120e45dd8f8a 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -43,6 +43,7 @@
#include "clang/Lex/MacroArgs.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/NoTrivialPPDirectiveTracer.h"
#include "clang/Lex/Pragma.h"
#include "clang/Lex/PreprocessingRecord.h"
#include "clang/Lex/PreprocessorLexer.h"
@@ -247,8 +248,6 @@ void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
llvm::errs() << " [LeadingSpace]";
if (Tok.isExpandDisabled())
llvm::errs() << " [ExpandDisabled]";
- if (Tok.isFirstPPToken())
- llvm::errs() << " [First pp-token]";
if (To...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154077
More information about the llvm-branch-commits
mailing list