[clang] [C++20][Modules] Implement P1857R3 Modules Dependency Discovery (PR #107168)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 3 17:00:39 PDT 2024
https://github.com/yronglin created https://github.com/llvm/llvm-project/pull/107168
None
>From fb028015b8f86f87b6d1643e91c21531c768a198 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Sat, 17 Aug 2024 16:54:26 +0800
Subject: [PATCH 1/2] [Clang] Add peekNextPPToken, makes peek next token
without side-effects
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/include/clang/Lex/Lexer.h | 10 ++++----
clang/include/clang/Lex/Preprocessor.h | 8 ++++++-
clang/include/clang/Lex/TokenLexer.h | 7 +++---
clang/lib/Lex/Lexer.cpp | 21 +++++++++--------
clang/lib/Lex/PPMacroExpansion.cpp | 32 ++++++++++++--------------
clang/lib/Lex/TokenLexer.cpp | 10 ++++----
6 files changed, 46 insertions(+), 42 deletions(-)
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index b6ecc7e5ded9e2..1e665c13b392f2 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
//===--------------------------------------------------------------------===//
// Context that changes as the file is lexed.
// NOTE: any state that mutates when in raw mode must have save/restore code
- // in Lexer::isNextPPTokenLParen.
+ // in Lexer::peekNextPPToken.
// BufferPtr - Current pointer into the buffer. This is the next character
// to be lexed.
@@ -629,10 +629,10 @@ class Lexer : public PreprocessorLexer {
BufferPtr = TokEnd;
}
- /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
- /// tok::l_paren token, 0 if it is something else and 2 if there are no more
- /// tokens in the buffer controlled by this lexer.
- unsigned isNextPPTokenLParen();
+ /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+ /// buffer controlled by this lexer, otherwise return the next unexpanded
+ /// token.
+ std::optional<Token> peekNextPPToken();
//===--------------------------------------------------------------------===//
// Lexer character reading interfaces.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 1307659e27d137..0ab138974aeb20 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2650,10 +2650,16 @@ class Preprocessor {
void removeCachedMacroExpandedTokensOfLastLexer();
+ /// Peek the next token. If so, return the token, if not, this
+ /// method should have no observable side-effect on the lexed tokens.
+ std::optional<Token> peekNextPPToken();
+
/// Determine whether the next preprocessor token to be
/// lexed is a '('. If so, consume the token and return true, if not, this
/// method should have no observable side-effect on the lexed tokens.
- bool isNextPPTokenLParen();
+ bool isNextPPTokenLParen() {
+ return peekNextPPToken().value_or(Token{}).is(tok::l_paren);
+ }
/// After reading "MACRO(", this method is invoked to read all of the formal
/// arguments specified for the macro invocation. Returns null on error.
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 4d229ae6106743..777b4e6266c714 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -139,10 +139,9 @@ class TokenLexer {
void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
bool OwnsTokens, bool IsReinject);
- /// If the next token lexed will pop this macro off the
- /// expansion stack, return 2. If the next unexpanded token is a '(', return
- /// 1, otherwise return 0.
- unsigned isNextTokenLParen() const;
+ /// If the next token lexed will pop this macro off the expansion stack,
+ /// return std::nullopt, otherwise return the next unexpanded token.
+ std::optional<Token> peekNextPPToken() const;
/// Lex and return a token from this macro stream.
bool Lex(Token &Tok);
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index ef1e1f4bd9aeb4..af533b3874cf5d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3193,18 +3193,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
return PP->HandleEndOfFile(Result, isPragmaLexer());
}
-/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
-/// the specified lexer will return a tok::l_paren token, 0 if it is something
-/// else and 2 if there are no more tokens in the buffer controlled by the
-/// lexer.
-unsigned Lexer::isNextPPTokenLParen() {
+/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+/// buffer controlled by this lexer, otherwise return the next unexpanded
+/// token.
+std::optional<Token> Lexer::peekNextPPToken() {
assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
if (isDependencyDirectivesLexer()) {
if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
- return 2;
- return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
- tok::l_paren);
+ return std::nullopt;
+ Token Result;
+ (void)convertDependencyDirectiveToken(
+ DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
+ return Result;
}
// Switch to 'skipping' mode. This will ensure that we can lex a token
@@ -3233,8 +3234,8 @@ unsigned Lexer::isNextPPTokenLParen() {
LexingRawMode = false;
if (Tok.is(tok::eof))
- return 2;
- return Tok.is(tok::l_paren);
+ return std::nullopt;
+ return Tok;
}
/// Find the end of a version control conflict marker.
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 1d671ab72b0c03..0daa7fa96f89b1 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -437,42 +437,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
return !llvm::is_contained(MI->params(), II);
}
-/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
-/// lexed is a '('. If so, consume the token and return true, if not, this
-/// method should have no observable side-effect on the lexed tokens.
-bool Preprocessor::isNextPPTokenLParen() {
+/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not,
+/// this method should have no observable side-effect on the lexed tokens.
+std::optional<Token> Preprocessor::peekNextPPToken() {
// Do some quick tests for rejection cases.
- unsigned Val;
+ std::optional<Token> Val;
if (CurLexer)
- Val = CurLexer->isNextPPTokenLParen();
+ Val = CurLexer->peekNextPPToken();
else
- Val = CurTokenLexer->isNextTokenLParen();
+ Val = CurTokenLexer->peekNextPPToken();
- if (Val == 2) {
+ if (!Val) {
// We have run off the end. If it's a source file we don't
// examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
// macro stack.
if (CurPPLexer)
- return false;
+ return std::nullopt;
for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
if (Entry.TheLexer)
- Val = Entry.TheLexer->isNextPPTokenLParen();
+ Val = Entry.TheLexer->peekNextPPToken();
else
- Val = Entry.TheTokenLexer->isNextTokenLParen();
+ Val = Entry.TheTokenLexer->peekNextPPToken();
- if (Val != 2)
+ if (Val)
break;
// Ran off the end of a source file?
if (Entry.ThePPLexer)
- return false;
+ return std::nullopt;
}
}
- // Okay, if we know that the token is a '(', lex it and return. Otherwise we
- // have found something that isn't a '(' or we found the end of the
- // translation unit. In either case, return false.
- return Val == 1;
+ // Okay, we found the token and return. Otherwise we found the end of the
+ // translation unit.
+ return Val;
}
/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 856d5682727fe3..0eca09ef93da92 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -922,13 +922,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
}
/// isNextTokenLParen - If the next token lexed will pop this macro off the
-/// expansion stack, return 2. If the next unexpanded token is a '(', return
-/// 1, otherwise return 0.
-unsigned TokenLexer::isNextTokenLParen() const {
+/// expansion stack, return std::nullopt, otherwise return the next unexpanded
+/// token.
+std::optional<Token> TokenLexer::peekNextPPToken() const {
// Out of tokens?
if (isAtEnd())
- return 2;
- return Tokens[CurTokenIdx].is(tok::l_paren);
+ return std::nullopt;
+ return Tokens[CurTokenIdx];
}
/// isParsingPreprocessorDirective - Return true if we are in the middle of a
>From b7b4e2549a5fb723b455768f2bcf75bfab6cab04 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Wed, 4 Sep 2024 00:57:52 +0800
Subject: [PATCH 2/2] [C++20][Modules] Implement P1857R3 Modules Dependency
Discovery
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
.../include/clang/Basic/DiagnosticLexKinds.td | 6 +-
.../clang/Basic/DiagnosticParseKinds.td | 4 +-
clang/include/clang/Basic/IdentifierTable.h | 27 +-
clang/include/clang/Basic/TokenKinds.def | 6 +
clang/include/clang/Lex/Preprocessor.h | 87 +++++-
clang/include/clang/Lex/Token.h | 7 +
clang/include/clang/Parse/Parser.h | 4 -
clang/lib/Basic/IdentifierTable.cpp | 4 +-
clang/lib/Lex/DependencyDirectivesScanner.cpp | 28 +-
clang/lib/Lex/Lexer.cpp | 34 +-
clang/lib/Lex/PPDirectives.cpp | 214 ++++++++++++-
clang/lib/Lex/Preprocessor.cpp | 293 +++++-------------
clang/lib/Lex/TokenLexer.cpp | 3 +-
clang/lib/Parse/Parser.cpp | 65 +---
.../Lex/DependencyDirectivesScannerTest.cpp | 10 +-
15 files changed, 483 insertions(+), 309 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205ee9..afd56fb44b2e71 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -477,7 +477,7 @@ def warn_cxx98_compat_variadic_macro : Warning<
def ext_named_variadic_macro : Extension<
"named variadic macros are a GNU extension">, InGroup<VariadicMacros>;
def err_embedded_directive : Error<
- "embedding a #%0 directive within macro arguments is not supported">;
+ "embedding a %select{#|C++ }0%1 directive within macro arguments is not supported">;
def ext_embedded_directive : Extension<
"embedding a directive within macro arguments has undefined behavior">,
InGroup<DiagGroup<"embedded-directive">>;
@@ -952,6 +952,10 @@ def warn_module_conflict : Warning<
InGroup<ModuleConflict>;
// C++20 modules
+def err_module_decl_in_header : Error<
+ "module declaration must not come from an #include directive">;
+def err_pp_cond_span_module_decl : Error<
+ "preprocessor conditionals shall not span a module declaration">;
def err_header_import_semi_in_macro : Error<
"semicolon terminating header import declaration cannot be produced "
"by a macro">;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 0b8ab4bf092509..c8654f12cb349a 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1695,8 +1695,8 @@ def ext_bit_int : Extension<
} // end of Parse Issue category.
let CategoryName = "Modules Issue" in {
-def err_unexpected_module_decl : Error<
- "module declaration can only appear at the top level">;
+def err_unexpected_module_or_import_decl : Error<
+ "%select{module|import}0 declaration can only appear at the top level">;
def err_module_expected_ident : Error<
"expected a module name after '%select{module|import}0'">;
def err_attribute_not_module_attr : Error<
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index ae9ebd9f59154e..9423676351e378 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsModulesImport : 1;
+ // True if this is the 'module' contextual keyword.
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned IsModulesDecl : 1;
+
// True if this is a mangled OpenMP variant name.
LLVM_PREFERRED_TYPE(bool)
unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsFinal : 1;
- // 22 bits left in a 64-bit word.
+ // 21 bits left in a 64-bit word.
// Managed by the language front-end.
void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
- IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
- IsRestrictExpansion(false), IsFinal(false) {}
+ IsModulesDecl(false), IsMangledOpenMPVariantName(false),
+ IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
public:
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
RecomputeNeedsHandleIdentifier();
}
+ /// Determine whether this is the contextual keyword \c module.
+ bool isModulesDeclaration() const { return IsModulesDecl; }
+
+ /// Set whether this identifier is the contextual keyword \c module.
+ void setModulesDeclaration(bool I) {
+ IsModulesDecl = I;
+ if (I)
+ NeedsHandleIdentifier = true;
+ else
+ RecomputeNeedsHandleIdentifier();
+ }
+
/// Determine whether this is the mangled name of an OpenMP variant.
bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
@@ -737,10 +753,11 @@ class IdentifierTable {
// contents.
II->Entry = &Entry;
- // If this is the 'import' contextual keyword, mark it as such.
+ // If this is the 'import' or 'module' contextual keyword, mark it as such.
if (Name == "import")
II->setModulesImport(true);
-
+ else if (Name == "module")
+ II->setModulesDeclaration(true);
return *II;
}
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 212c1f6ff3a124..6416920539db53 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -129,6 +129,9 @@ PPKEYWORD(pragma)
// C23 & C++26 #embed
PPKEYWORD(embed)
+// C++20 Module Directive
+PPKEYWORD(module)
+
// GNU Extensions.
PPKEYWORD(import)
PPKEYWORD(include_next)
@@ -1014,6 +1017,9 @@ ANNOTATION(module_include)
ANNOTATION(module_begin)
ANNOTATION(module_end)
+// Annotations for C++, Clang and Objective-C named modules.
+ANNOTATION(module_name)
+
// Annotation for a header_name token that has been looked up and transformed
// into the name of a header unit.
ANNOTATION(header_unit)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 0ab138974aeb20..a9acde7ac51df3 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -128,6 +128,70 @@ enum class EmbedResult {
Empty = 2, // Corresponds to __STDC_EMBED_EMPTY__
};
+/// Represents module or partition name token sequance.
+///
+/// module-name:
+/// module-name-qualifier[opt] identifier
+///
+/// partition-name: [C++20]
+/// : module-name-qualifier[opt] identifier
+///
+/// module-name-qualifier
+/// module-name-qualifier[opt] identifier .
+///
+/// This class can only be created by the preprocessor and guarantees that the
+/// two source array being contiguous in memory and only contains 3 kind of
+/// tokens (identifier, '.' and ':'). And only available when the preprocessor
+/// returns annot_module_name token.
+///
+/// For exmaple:
+///
+/// export module m.n:c.d
+///
+/// The module name array has 3 tokens ['m', '.', 'n'].
+/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
+///
+/// When import a partition in a named module fragment (Eg. import :part1;),
+/// the module name array will be empty, and the partition name array has 2
+/// tokens.
+///
+/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
+/// will not return a annot_module_name token, but will return 2 separate tokens
+/// [':', 'kw_private'].
+class ModuleNameInfo {
+ friend class Preprocessor;
+ ArrayRef<Token> ModuleName;
+ ArrayRef<Token> PartitionName;
+
+ ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
+
+public:
+ /// Return the contiguous token array.
+ ArrayRef<Token> getTokens() const {
+ if (ModuleName.empty())
+ return PartitionName;
+ if (PartitionName.empty())
+ return ModuleName;
+ return ArrayRef(ModuleName.begin(), PartitionName.end());
+ }
+ bool hasModuleName() const { return !ModuleName.empty(); }
+ bool hasPartitionName() const { return !PartitionName.empty(); }
+ ArrayRef<Token> getModuleName() const { return ModuleName; }
+ ArrayRef<Token> getPartitionName() const { return PartitionName; }
+ Token getColonToken() const {
+ assert(hasPartitionName() && "Do not have a partition name");
+ return getPartitionName().front();
+ }
+
+ /// Under the standard C++ Modules, the dot is just part of the module name,
+ /// and not a real hierarchy separator. Flatten such module names now.
+ std::string getFlatName() const;
+
+ void buildNamedModuleIdPath(
+ Preprocessor &P,
+ SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
+};
+
/// Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
@@ -336,6 +400,15 @@ class Preprocessor {
/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;
+
+ struct ExportContextualKeywordInfo {
+ Token ExportTok;
+ bool TokAtPhysicalStartOfLine;
+ };
+
+ /// Whether the last token we lexed was an 'export' keyword.
+ std::optional<ExportContextualKeywordInfo> LastTokenWasExportKeyword =
+ std::nullopt;
/// A position within a C++20 import-seq.
class StdCXXImportSeq {
@@ -1767,6 +1840,17 @@ class Preprocessor {
std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
bool ForHasEmbed);
+ bool LexModuleNameOrHeaderName(Token &Result, bool IsImport);
+ /// Callback invoked when the lexer sees one of export, import or module token
+ /// at the start of a line.
+ ///
+ /// This consumes the import, module directive, modifies the
+ /// lexer/preprocessor state, and advances the lexer(s) so that the next token
+ /// read is the correct one.
+ bool HandleModuleContextualKeyword(Token &Result, bool TokAtPhysicalStartOfLine);
+
+ void HandleModuleDirective(Token &ModuleOrImportKeyword);
+ void LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport);
bool LexAfterModuleImport(Token &Result);
void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
@@ -2344,7 +2428,7 @@ class Preprocessor {
///
/// \return The location of the end of the directive (the terminating
/// newline).
- SourceLocation CheckEndOfDirective(const char *DirType,
+ SourceLocation CheckEndOfDirective(StringRef DirType,
bool EnableMacros = false);
/// Read and discard all tokens remaining on the current line until
@@ -2785,6 +2869,7 @@ class Preprocessor {
void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
+ void HandleCXXModuleOrImportDirective(Token &KeywordTok);
void HandleMicrosoftImportDirective(Token &Tok);
public:
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 4f29fb7d114159..8e81207ddf8d7d 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -231,6 +231,9 @@ class Token {
PtrData = const_cast<char*>(Ptr);
}
+ template <class T> T getAnnotationValueAs() const {
+ return static_cast<T>(getAnnotationValue());
+ }
void *getAnnotationValue() const {
assert(isAnnotation() && "Used AnnotVal on non-annotation token");
return PtrData;
@@ -289,6 +292,10 @@ class Token {
/// Return the ObjC keyword kind.
tok::ObjCKeywordKind getObjCKeywordID() const;
+ /// Return true if we have an C++20 Modules contextual keyword(export, import
+ /// or module).
+ bool isModuleContextualKeyword(bool AllowExport = true) const;
+
bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
/// Return true if this token has trigraphs or escaped newlines in it.
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index a7513069ff5da0..2bb1af2e01b527 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler {
mutable IdentifierInfo *Ident_GNU_final;
mutable IdentifierInfo *Ident_override;
- // C++2a contextual keywords.
- mutable IdentifierInfo *Ident_import;
- mutable IdentifierInfo *Ident_module;
-
// C++ type trait keywords that can be reverted to identifiers and still be
// used as type traits.
llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits;
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index c9c9d927a5902e..10ed86d5d5990c 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -326,8 +326,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
if (LangOpts.IEEE128)
AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
- // Add the 'import' contextual keyword.
+ // Add the 'import' and 'module' contextual keyword.
get("import").setModulesImport(true);
+ get("module").setModulesDeclaration(true);
}
/// Checks if the specified token kind represents a keyword in the
@@ -456,6 +457,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
CASE( 6, 'd', 'f', define);
CASE( 6, 'i', 'n', ifndef);
CASE( 6, 'i', 'p', import);
+ CASE( 6, 'm', 'd', module);
CASE( 6, 'p', 'a', pragma);
CASE( 7, 'd', 'f', defined);
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a21..58e92977f99cc2 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -497,21 +497,32 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
for (;;) {
const dependency_directives_scan::Token &Tok = lexToken(First, End);
- if (Tok.is(tok::eof))
+ if (Tok.isOneOf(tok::eof, tok::eod))
return reportError(
DirectiveLoc,
diag::err_dep_source_scanner_missing_semi_after_at_import);
if (Tok.is(tok::semi))
break;
}
+
+ // Skip extra tokens after semi in C++20 Modules directive.
+ bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl ||
+ Kind == DirectiveKind::cxx_export_module_decl ||
+ Kind == DirectiveKind::cxx_import_decl ||
+ Kind == DirectiveKind::cxx_module_decl;
+ if (IsCXXModules)
+ lexPPDirectiveBody(First, End);
pushDirective(Kind);
skipWhitespace(First, End);
if (First == End)
return false;
- if (!isVerticalWhitespace(*First))
- return reportError(
- DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
- skipNewline(First, End);
+ if (!IsCXXModules) {
+ if (!isVerticalWhitespace(*First))
+ return reportError(
+ DirectiveLoc,
+ diag::err_dep_source_scanner_unexpected_tokens_at_import);
+ skipNewline(First, End);
+ }
return false;
}
@@ -846,8 +857,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
- if (*First == 'i' || *First == 'e' || *First == 'm')
- return lexModule(First, End);
+ // if (!LangOpts.CPlusPlusModules && (*First == 'i' || *First == 'e' || *First == 'm'))
+ // return lexModule(First, End);
if (*First == '_') {
if (isNextIdentifierOrSkipLine("_Pragma", First, End))
@@ -860,7 +871,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
TheLexer.setParsingPreprocessorDirective(true);
auto ScEx2 = make_scope_exit(
[&]() { TheLexer.setParsingPreprocessorDirective(false); });
-
+ if (*First == 'i' || *First == 'e' || *First == 'm')
+ return lexModule(First, End);
// Lex '#'.
const dependency_directives_scan::Token &HashTok = lexToken(First, End);
if (HashTok.is(tok::hashhash)) {
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index af533b3874cf5d..9f363e70c66541 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -74,6 +74,19 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
}
+/// Return true if we have an C++20 Modules contextual keyword(export, import
+/// or module).
+bool Token::isModuleContextualKeyword(bool AllowExport) const {
+ if (AllowExport && is(tok::kw_export))
+ return true;
+ if (isOneOf(tok::kw_import, tok::kw_module))
+ return true;
+ if (isNot(tok::identifier))
+ return false;
+ const auto *II = getIdentifierInfo();
+ return II->isModulesImport() || II->isModulesDeclaration();
+}
+
/// Determine whether the token kind starts a simple-type-specifier.
bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
switch (getKind()) {
@@ -3996,11 +4009,17 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
case 'v': case 'w': case 'x': case 'y': case 'z':
- case '_':
+ case '_': {
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexIdentifierContinue(Result, CurPtr);
-
+ bool returnedToken = LexIdentifierContinue(Result, CurPtr);
+ if (returnedToken && Result.isModuleContextualKeyword() &&
+ LangOpts.CPlusPlusModules &&
+ PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine) &&
+ !LexingRawMode && !Is_PragmaLexer)
+ goto HandleDirective;
+ return returnedToken;
+ }
case '$': // $ in identifiers.
if (LangOpts.DollarIdents) {
if (!isLexingRawMode())
@@ -4484,8 +4503,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
HandleDirective:
// We parsed a # character and it's the start of a preprocessing directive.
-
- FormTokenWithChars(Result, CurPtr, tok::hash);
+ if (!Result.isOneOf(tok::kw_import, tok::kw_module))
+ FormTokenWithChars(Result, CurPtr, tok::hash);
PP->HandleDirective(Result);
if (PP->hadModuleLoaderFatalFailure())
@@ -4559,6 +4578,11 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
Result.setRawIdentifierData(TokPtr);
if (!isLexingRawMode()) {
const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+ if (Result.isModuleContextualKeyword() &&
+ PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) {
+ PP->HandleDirective(Result);
+ return false;
+ }
if (II->isHandleIdentifierCase())
return PP->HandleIdentifier(Result);
}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 4e77df9ec444c7..b40de9d9184048 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -413,7 +413,7 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
/// true, then we consider macros that expand to zero tokens as being ok.
///
/// Returns the location of the end of the directive.
-SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
+SourceLocation Preprocessor::CheckEndOfDirective(StringRef DirType,
bool EnableMacros) {
Token Tmp;
// Lex unexpanded tokens for most directives: macros might expand to zero
@@ -1219,9 +1219,14 @@ void Preprocessor::HandleDirective(Token &Result) {
// Save the '#' token in case we need to return it later.
Token SavedHash = Result;
+ bool IsCXX20ImportOrModuleDirective =
+ getLangOpts().CPlusPlusModules &&
+ Result.isModuleContextualKeyword(/*AllowExport=*/false);
+
// Read the next token, the directive flavor. This isn't expanded due to
// C99 6.10.3p8.
- LexUnexpandedToken(Result);
+ if (!IsCXX20ImportOrModuleDirective)
+ LexUnexpandedToken(Result);
// C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
// #define A(x) #x
@@ -1240,7 +1245,9 @@ void Preprocessor::HandleDirective(Token &Result) {
case tok::pp___include_macros:
case tok::pp_pragma:
case tok::pp_embed:
- Diag(Result, diag::err_embedded_directive) << II->getName();
+ case tok::pp_module:
+ Diag(Result, diag::err_embedded_directive)
+ << IsCXX20ImportOrModuleDirective << II->getName();
Diag(*ArgMacro, diag::note_macro_expansion_here)
<< ArgMacro->getIdentifierInfo();
DiscardUntilEndOfDirective();
@@ -1331,9 +1338,12 @@ void Preprocessor::HandleDirective(Token &Result) {
// C99 6.10.6 - Pragma Directive.
case tok::pp_pragma:
return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
-
+ case tok::pp_module:
+ return HandleCXXModuleOrImportDirective(Result);
// GNU Extensions.
case tok::pp_import:
+ if (IsCXX20ImportOrModuleDirective)
+ return HandleCXXModuleOrImportDirective(Result);
return HandleImportDirective(SavedHash.getLocation(), Result);
case tok::pp_include_next:
return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
@@ -4012,3 +4022,199 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
*Params);
HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
}
+
+void Preprocessor::LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport) {
+ Token Result;
+ Suffix.clear();
+Retry:
+ if (IsImport && getLangOpts().CPlusPlusModules) {
+ (void) LexHeaderName(Result);
+ if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
+ std::string Name = ModuleDeclState.getPrimaryName().str();
+ Name += ":";
+ NamedModuleImportPath.push_back(
+ {getIdentifierInfo(Name), Result.getLocation()});
+ }
+ } else {
+ Lex(Result);
+ }
+
+ Suffix.push_back(Result);
+ if (Result.isOneOf(tok::eof, tok::eod))
+ return;
+ if (Result.is(tok::code_completion))
+ goto Retry;
+
+ // Lex a module name
+ if (Result.isOneOf(tok::identifier, tok::colon)) {
+ bool ExpectsIdentifier = Result.is(tok::colon);
+ if (Result.is(tok::identifier))
+ NamedModuleImportPath.push_back(
+ std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
+ while (true) {
+ Lex(Result);
+ Suffix.push_back(Result);
+ if (ExpectsIdentifier && Result.is(tok::identifier)) {
+ ExpectsIdentifier = false;
+ // We expected to see an identifier here, and we did; continue handling
+ // identifiers.
+ NamedModuleImportPath.push_back(
+ std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
+ continue;
+ }
+
+ if (!ExpectsIdentifier && Result.is(tok::period)) {
+ ExpectsIdentifier = true;
+ continue;
+ }
+
+ // Module partition only allowed in C++20 Modules.
+ // FIXME: Should we accept partition here for error recovery?
+ if (!ExpectsIdentifier && Result.is(tok::colon)) {
+ ExpectsIdentifier = true;
+ continue;
+ }
+ break;
+ }
+
+ // Under the standard C++ Modules, the dot is just part of the module name,
+ // and not a real hierarchy separator. Flatten such module names now.
+ //
+ // FIXME: Is this the right level to be performing this transformation?
+ std::string FlatModuleName;
+ if (getLangOpts().CPlusPlusModules) {
+ for (auto &Piece : NamedModuleImportPath) {
+ // If the FlatModuleName ends with colon, it implies it is a partition.
+ if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
+ FlatModuleName += ".";
+ FlatModuleName += Piece.first->getName();
+ }
+ SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
+ NamedModuleImportPath.clear();
+ NamedModuleImportPath.push_back(
+ std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+ }
+ }
+
+ // Consume the pp-import-suffix and expand any macros in it now, if we're not
+ // at the semicolon already.
+ SourceLocation SemiLoc = Result.getLocation();
+ if (Result.isNot(tok::semi)) {
+ CollectPpImportSuffix(Suffix);
+ if (Suffix.back().isNot(tok::semi))
+ return;
+ SemiLoc = Suffix.back().getLocation();
+ }
+
+ if (!IsImport)
+ return;
+
+ if (getLangOpts().CPlusPlusModules && Suffix.front().is(tok::colon) &&
+ !ModuleDeclState.isNamedModule())
+ return;
+
+ // C++2a [cpp.module]p1:
+ // The ';' preprocessing-token terminating a pp-import shall not have
+ // been produced by macro replacement.
+ if (getLangOpts().CPlusPlusModules && SemiLoc.isMacroID())
+ Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+ // Check for a header-name.
+ if (IsImport && Result.is(tok::header_name)) {
+ // Reconstitute the import token.
+ Token ImportTok;
+ ImportTok.startToken();
+ ImportTok.setKind(tok::kw_import);
+ ImportTok.setLocation(ModuleImportLoc);
+ ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+ ImportTok.setLength(6);
+
+ auto Action = HandleHeaderIncludeOrImport(
+ /*HashLoc*/ SourceLocation(), ImportTok, Result, SemiLoc);
+ switch (Action.Kind) {
+ case ImportAction::None:
+ break;
+
+ case ImportAction::ModuleBegin:
+ // Let the parser know we're textually entering the module.
+ Suffix.emplace_back();
+ Suffix.back().startToken();
+ Suffix.back().setKind(tok::annot_module_begin);
+ Suffix.back().setLocation(SemiLoc);
+ Suffix.back().setAnnotationEndLoc(SemiLoc);
+ Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+ [[fallthrough]];
+
+ case ImportAction::ModuleImport:
+ case ImportAction::HeaderUnitImport:
+ case ImportAction::SkippedModuleImport:
+ // We chose to import (or textually enter) the file. Convert the
+ // header-name token into a header unit annotation token.
+ Suffix[0].setKind(tok::annot_header_unit);
+ Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+ Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+ // FIXME: Call the moduleImport callback?
+ break;
+ case ImportAction::Failure:
+ assert(TheModuleLoader.HadFatalFailure &&
+ "This should be an early exit only to a fatal error");
+ Result.setKind(tok::eof);
+ Suffix.clear();
+ Suffix.push_back(Result);
+ CurLexer->cutOffLexing();
+ return;
+ }
+ return;
+ }
+
+ // Check module name
+ Module *Imported = nullptr;
+ // We don't/shouldn't load the standard c++20 modules when preprocessing.
+ if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
+ Imported = TheModuleLoader.loadModule(ModuleImportLoc,
+ NamedModuleImportPath,
+ Module::Hidden,
+ /*IsInclusionDirective=*/false);
+ if (Imported)
+ makeModuleVisible(Imported, SemiLoc);
+ }
+
+ if (Callbacks)
+ Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
+}
+
+void Preprocessor::HandleCXXModuleOrImportDirective(Token &ModuleTok) {
+ assert(ModuleTok.isOneOf(tok::kw_import, tok::kw_module));
+ SourceLocation DirectiveStartLoc =
+ LastTokenWasExportKeyword
+ ? LastTokenWasExportKeyword->ExportTok.getLocation()
+ : ModuleTok.getLocation();
+ bool IsImport = ModuleTok.is(tok::kw_import);
+ SmallVector<Token, 32> Suffix;
+ LexAfterModuleImport(Suffix, IsImport);
+
+ if (!Suffix.empty() && Suffix.back().is(tok::semi))
+ CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName());
+
+ if (ModuleTok.is(tok::kw_module)) {
+ if (!IncludeMacroStack.empty()) {
+ Diag(DirectiveStartLoc, diag::err_module_decl_in_header)
+ << SourceRange(DirectiveStartLoc, Suffix.back().getLocation());
+ }
+
+ if (CurPPLexer->getConditionalStackDepth() != 0) {
+ Diag(DirectiveStartLoc, diag::err_pp_cond_span_module_decl)
+ << SourceRange(DirectiveStartLoc, Suffix.back().getLocation());
+ }
+ }
+
+ unsigned NumTokens = Suffix.size() + 1;
+ // Allocate a holding buffer for a sequence of tokens and introduce it into
+ // the token stream.
+ auto ToksCopy = std::make_unique<Token[]>(NumTokens);
+ ToksCopy[0] = ModuleTok;
+ std::copy(Suffix.begin(), Suffix.end(), &ToksCopy[1]);
+ EnterTokenStream(std::move(ToksCopy), NumTokens,
+ /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+ return;
+}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index f0b4593e0cc22e..eb694ce41566fb 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -905,6 +905,7 @@ void Preprocessor::Lex(Token &Result) {
// This token is injected to represent the translation of '#include "a.h"'
// into "import a.h;". Mimic the notional ';'.
case tok::annot_module_include:
+ case tok::annot_repl_input_end:
case tok::semi:
TrackGMFState.handleSemi();
StdCXXImportSeqState.handleSemi();
@@ -925,27 +926,20 @@ void Preprocessor::Lex(Token &Result) {
case tok::period:
ModuleDeclState.handlePeriod();
break;
- case tok::identifier:
- // Check "import" and "module" when there is no open bracket. The two
- // identifiers are not meaningful with open brackets.
+ case tok::kw_module:
+ TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
+ ModuleDeclState.handleModule();
+ break;
+ case tok::kw_import:
if (StdCXXImportSeqState.atTopLevel()) {
- if (Result.getIdentifierInfo()->isModulesImport()) {
- TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
- StdCXXImportSeqState.handleImport();
- if (StdCXXImportSeqState.afterImportSeq()) {
- ModuleImportLoc = Result.getLocation();
- NamedModuleImportPath.clear();
- IsAtImport = false;
- ModuleImportExpectsIdentifier = true;
- CurLexerCallback = CLK_LexAfterModuleImport;
- }
- break;
- } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
- TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
- ModuleDeclState.handleModule();
- break;
- }
+ TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
+ StdCXXImportSeqState.handleImport();
}
+ // ModuleImportLoc = Result.getLocation();
+ // NamedModuleImportPath.clear();
+ // IsAtImport = false;
+ break;
+ case tok::identifier:
ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
if (ModuleDeclState.isModuleCandidate())
break;
@@ -964,6 +958,8 @@ void Preprocessor::Lex(Token &Result) {
}
LastTokenWasAt = Result.is(tok::at);
+ if (Result.isNot(tok::kw_export))
+ LastTokenWasExportKeyword.reset();
--LexLevel;
if ((LexLevel == 0 || PreprocessToken) &&
@@ -1111,7 +1107,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
if (BracketDepth == 0)
return;
break;
-
+ case tok::eod:
case tok::eof:
return;
@@ -1121,213 +1117,82 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
}
}
-
-/// Lex a token following the 'import' contextual keyword.
-///
-/// pp-import: [C++20]
-/// import header-name pp-import-suffix[opt] ;
-/// import header-name-tokens pp-import-suffix[opt] ;
-/// [ObjC] @ import module-name ;
-/// [Clang] import module-name ;
-///
-/// header-name-tokens:
-/// string-literal
-/// < [any sequence of preprocessing-tokens other than >] >
-///
-/// module-name:
-/// module-name-qualifier[opt] identifier
-///
-/// module-name-qualifier
-/// module-name-qualifier[opt] identifier .
+/// P1857R3: Modules Dependency Discovery
///
-/// We respond to a pp-import by importing macros from the named module.
-bool Preprocessor::LexAfterModuleImport(Token &Result) {
- // Figure out what kind of lexer we actually have.
- recomputeCurLexerKind();
+/// At the start of phase 4 an import or module token is treated as starting a
+/// directive and are converted to their respective keywords iff:
+/// • After skipping horizontal whitespace are
+/// • at the start of a logical line, or
+/// • preceded by an 'export' at the start of the logical line.
+/// • Are followed by an identifier pp token (before macro expansion), or
+/// • <, ", or : (but not ::) pp tokens for 'import', or
+/// • ; for 'module'
+/// Otherwise the token is treated as an identifier.
+bool Preprocessor::HandleModuleContextualKeyword(
+ Token &Result, bool TokAtPhysicalStartOfLine) {
+ if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
+ return false;
- // Lex the next token. The header-name lexing rules are used at the start of
- // a pp-import.
- //
- // For now, we only support header-name imports in C++20 mode.
- // FIXME: Should we allow this in all language modes that support an import
- // declaration as an extension?
- if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
- if (LexHeaderName(Result))
- return true;
-
- if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
- std::string Name = ModuleDeclState.getPrimaryName().str();
- Name += ":";
- NamedModuleImportPath.push_back(
- {getIdentifierInfo(Name), Result.getLocation()});
- CurLexerCallback = CLK_LexAfterModuleImport;
- return true;
- }
- } else {
- Lex(Result);
+ if (Result.is(tok::kw_export)) {
+ LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
+ return false;
}
- // Allocate a holding buffer for a sequence of tokens and introduce it into
- // the token stream.
- auto EnterTokens = [this](ArrayRef<Token> Toks) {
- auto ToksCopy = std::make_unique<Token[]>(Toks.size());
- std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
- EnterTokenStream(std::move(ToksCopy), Toks.size(),
- /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
- };
-
- bool ImportingHeader = Result.is(tok::header_name);
- // Check for a header-name.
- SmallVector<Token, 32> Suffix;
- if (ImportingHeader) {
- // Enter the header-name token into the token stream; a Lex action cannot
- // both return a token and cache tokens (doing so would corrupt the token
- // cache if the call to Lex comes from CachingLex / PeekAhead).
- Suffix.push_back(Result);
-
- // Consume the pp-import-suffix and expand any macros in it now. We'll add
- // it back into the token stream later.
- CollectPpImportSuffix(Suffix);
- if (Suffix.back().isNot(tok::semi)) {
- // This is not a pp-import after all.
- EnterTokens(Suffix);
+ if (LastTokenWasExportKeyword) {
+ auto Export = *LastTokenWasExportKeyword;
+ LastTokenWasExportKeyword.reset();
+ // The export keyword was not at the start of line, it's not a
+ // directive-introducing token.
+ if (!Export.TokAtPhysicalStartOfLine)
return false;
- }
-
- // C++2a [cpp.module]p1:
- // The ';' preprocessing-token terminating a pp-import shall not have
- // been produced by macro replacement.
- SourceLocation SemiLoc = Suffix.back().getLocation();
- if (SemiLoc.isMacroID())
- Diag(SemiLoc, diag::err_header_import_semi_in_macro);
-
- // Reconstitute the import token.
- Token ImportTok;
- ImportTok.startToken();
- ImportTok.setKind(tok::kw_import);
- ImportTok.setLocation(ModuleImportLoc);
- ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
- ImportTok.setLength(6);
-
- auto Action = HandleHeaderIncludeOrImport(
- /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
- switch (Action.Kind) {
- case ImportAction::None:
- break;
-
- case ImportAction::ModuleBegin:
- // Let the parser know we're textually entering the module.
- Suffix.emplace_back();
- Suffix.back().startToken();
- Suffix.back().setKind(tok::annot_module_begin);
- Suffix.back().setLocation(SemiLoc);
- Suffix.back().setAnnotationEndLoc(SemiLoc);
- Suffix.back().setAnnotationValue(Action.ModuleForHeader);
- [[fallthrough]];
-
- case ImportAction::ModuleImport:
- case ImportAction::HeaderUnitImport:
- case ImportAction::SkippedModuleImport:
- // We chose to import (or textually enter) the file. Convert the
- // header-name token into a header unit annotation token.
- Suffix[0].setKind(tok::annot_header_unit);
- Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
- Suffix[0].setAnnotationValue(Action.ModuleForHeader);
- // FIXME: Call the moduleImport callback?
- break;
- case ImportAction::Failure:
- assert(TheModuleLoader.HadFatalFailure &&
- "This should be an early exit only to a fatal error");
- Result.setKind(tok::eof);
- CurLexer->cutOffLexing();
- EnterTokens(Suffix);
- return true;
- }
-
- EnterTokens(Suffix);
+ // [cpp.pre]/1.4
+ // export // not a preprocessing directive
+ // import foo; // preprocessing directive (ill-formed at phase
+ // 7)
+ if (TokAtPhysicalStartOfLine)
+ return false;
+ } else if (!TokAtPhysicalStartOfLine)
return false;
- }
- // The token sequence
- //
- // import identifier (. identifier)*
- //
- // indicates a module import directive. We already saw the 'import'
- // contextual keyword, so now we're looking for the identifiers.
- if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
- // We expected to see an identifier here, and we did; continue handling
- // identifiers.
- NamedModuleImportPath.push_back(
- std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
- ModuleImportExpectsIdentifier = false;
- CurLexerCallback = CLK_LexAfterModuleImport;
- return true;
- }
-
- // If we're expecting a '.' or a ';', and we got a '.', then wait until we
- // see the next identifier. (We can also see a '[[' that begins an
- // attribute-specifier-seq here under the Standard C++ Modules.)
- if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
- ModuleImportExpectsIdentifier = true;
- CurLexerCallback = CLK_LexAfterModuleImport;
- return true;
- }
-
- // If we didn't recognize a module name at all, this is not a (valid) import.
- if (NamedModuleImportPath.empty() || Result.is(tok::eof))
+ bool SavedParsingPreprocessorDirective = CurPPLexer->ParsingPreprocessorDirective;
+ CurPPLexer->ParsingPreprocessorDirective = true;
+ // Peek next token.
+ auto NextTok = peekNextPPToken().value_or(Token{});
+ CurPPLexer->ParsingPreprocessorDirective = SavedParsingPreprocessorDirective;
+ if (Result.getIdentifierInfo()->isModulesImport() &&
+ NextTok.isOneOf(tok::raw_identifier, tok::less, tok::string_literal,
+ tok::colon)) {
+ Result.setKind(tok::kw_import);
+ NamedModuleImportPath.clear();
+ ModuleImportLoc = Result.getLocation();
+ IsAtImport = false;
return true;
-
- // Consume the pp-import-suffix and expand any macros in it now, if we're not
- // at the semicolon already.
- SourceLocation SemiLoc = Result.getLocation();
- if (Result.isNot(tok::semi)) {
- Suffix.push_back(Result);
- CollectPpImportSuffix(Suffix);
- if (Suffix.back().isNot(tok::semi)) {
- // This is not an import after all.
- EnterTokens(Suffix);
- return false;
- }
- SemiLoc = Suffix.back().getLocation();
}
-
- // Under the standard C++ Modules, the dot is just part of the module name,
- // and not a real hierarchy separator. Flatten such module names now.
- //
- // FIXME: Is this the right level to be performing this transformation?
- std::string FlatModuleName;
- if (getLangOpts().CPlusPlusModules) {
- for (auto &Piece : NamedModuleImportPath) {
- // If the FlatModuleName ends with colon, it implies it is a partition.
- if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
- FlatModuleName += ".";
- FlatModuleName += Piece.first->getName();
- }
- SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
+ if (Result.getIdentifierInfo()->isModulesDeclaration() &&
+ NextTok.isOneOf(tok::raw_identifier, tok::colon, tok::semi)) {
+ Result.setKind(tok::kw_module);
NamedModuleImportPath.clear();
- NamedModuleImportPath.push_back(
- std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
- }
-
- Module *Imported = nullptr;
- // We don't/shouldn't load the standard c++20 modules when preprocessing.
- if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
- Imported = TheModuleLoader.loadModule(ModuleImportLoc,
- NamedModuleImportPath,
- Module::Hidden,
- /*IsInclusionDirective=*/false);
- if (Imported)
- makeModuleVisible(Imported, SemiLoc);
+ return true;
}
- if (Callbacks)
- Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
+ // Ok, it's an identifier.
+ return false;
+}
- if (!Suffix.empty()) {
- EnterTokens(Suffix);
- return false;
- }
- return true;
+bool Preprocessor::LexAfterModuleImport(Token &Result) {
+ recomputeCurLexerKind();
+ // Allocate a holding buffer for a sequence of tokens and introduce it into
+ // the token stream.
+ auto EnterTokens = [this](ArrayRef<Token> Toks) {
+ auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+ std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+ EnterTokenStream(std::move(ToksCopy), Toks.size(),
+ /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+ };
+ SmallVector<Token, 32> Suffix;
+ LexAfterModuleImport(Suffix, true);
+ EnterTokens(Suffix);
+ return false;
}
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 0eca09ef93da92..8fc39a5c517b81 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -700,7 +700,8 @@ bool TokenLexer::Lex(Token &Tok) {
HasLeadingSpace = false;
// Handle recursive expansion!
- if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
+ if (!Tok.isAnnotation() && !Tok.isModuleContextualKeyword() &&
+ Tok.getIdentifierInfo() != nullptr) {
// Change the kind of this identifier to the appropriate token kind, e.g.
// turning "for" into a keyword.
IdentifierInfo *II = Tok.getIdentifierInfo();
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 04c2f1d380bc48..6d3312684c1f90 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -514,8 +514,6 @@ void Parser::Initialize() {
Ident_abstract = nullptr;
Ident_override = nullptr;
Ident_GNU_final = nullptr;
- Ident_import = nullptr;
- Ident_module = nullptr;
Ident_super = &PP.getIdentifierTable().get("super");
@@ -571,11 +569,6 @@ void Parser::Initialize() {
PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block);
}
- if (getLangOpts().CPlusPlusModules) {
- Ident_import = PP.getIdentifierInfo("import");
- Ident_module = PP.getIdentifierInfo("module");
- }
-
Actions.Initialize();
// Prime the lexer look-ahead.
@@ -639,30 +632,12 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
switch (NextToken().getKind()) {
case tok::kw_module:
goto module_decl;
-
- // Note: no need to handle kw_import here. We only form kw_import under
- // the Standard C++ Modules, and in that case 'export import' is parsed as
- // an export-declaration containing an import-declaration.
-
- // Recognize context-sensitive C++20 'export module' and 'export import'
- // declarations.
- case tok::identifier: {
- IdentifierInfo *II = NextToken().getIdentifierInfo();
- if ((II == Ident_module || II == Ident_import) &&
- GetLookAheadToken(2).isNot(tok::coloncolon)) {
- if (II == Ident_module)
- goto module_decl;
- else
- goto import_decl;
- }
- break;
- }
-
+ case tok::kw_import:
+ goto import_decl;
default:
break;
}
break;
-
case tok::kw_module:
module_decl:
Result = ParseModuleDecl(ImportState);
@@ -725,22 +700,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
Actions.ActOnEndOfTranslationUnit();
//else don't tell Sema that we ended parsing: more input might come.
return true;
-
- case tok::identifier:
- // C++2a [basic.link]p3:
- // A token sequence beginning with 'export[opt] module' or
- // 'export[opt] import' and not immediately followed by '::'
- // is never interpreted as the declaration of a top-level-declaration.
- if ((Tok.getIdentifierInfo() == Ident_module ||
- Tok.getIdentifierInfo() == Ident_import) &&
- NextToken().isNot(tok::coloncolon)) {
- if (Tok.getIdentifierInfo() == Ident_module)
- goto module_decl;
- else
- goto import_decl;
- }
- break;
-
default:
break;
}
@@ -956,14 +915,6 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs,
};
Actions.CodeCompletion().CodeCompleteOrdinaryName(getCurScope(), PCC);
return nullptr;
- case tok::kw_import: {
- Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
- if (getLangOpts().CPlusPlusModules) {
- llvm_unreachable("not expecting a c++20 import here");
- ProhibitAttributes(Attrs);
- }
- SingleDecl = ParseModuleImport(SourceLocation(), IS);
- } break;
case tok::kw_export:
if (getLangOpts().CPlusPlusModules || getLangOpts().HLSL) {
ProhibitAttributes(Attrs);
@@ -1048,9 +999,10 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs,
case tok::kw___if_not_exists:
ParseMicrosoftIfExistsExternalDeclaration();
return nullptr;
-
+ case tok::kw_import:
case tok::kw_module:
- Diag(Tok, diag::err_unexpected_module_decl);
+ Diag(Tok, diag::err_unexpected_module_or_import_decl)
+ << Tok.is(tok::kw_import);
SkipUntil(tok::semi);
return nullptr;
@@ -2462,10 +2414,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
? Sema::ModuleDeclKind::Interface
: Sema::ModuleDeclKind::Implementation;
- assert(
- (Tok.is(tok::kw_module) ||
- (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) &&
- "not a module declaration");
+ assert(Tok.is(tok::kw_module) && "not a module declaration");
SourceLocation ModuleLoc = ConsumeToken();
// Attributes appear after the module name, not before.
@@ -2557,7 +2506,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
SourceLocation ExportLoc;
TryConsumeToken(tok::kw_export, ExportLoc);
- assert((AtLoc.isInvalid() ? Tok.isOneOf(tok::kw_import, tok::identifier)
+ assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import)
: Tok.isObjCAtKeyword(tok::objc_import)) &&
"Improper start to module import");
bool IsObjCAtImport = Tok.isObjCAtKeyword(tok::objc_import);
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118c..49c005b245d207 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -986,11 +986,11 @@ ort \
ASSERT_FALSE(
minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
- "exp\\\nort import:l[[rename]];"
- "import<<=3;import a b d e d e f e;"
- "import foo[[no_unique_address]];import foo();"
- "import f(:sefse);import f(->a=3);"
- "<TokBeforeEOF>\n",
+ "\nexp\\\nort import:l[[rename]];"
+ "\nimport<<=3;\nimport a b d e d e f e;"
+ "\nimport foo[[no_unique_address]];\nimport foo();"
+ "\nimport f(:sefse);\nimport f(->a=3);"
+ "\n<TokBeforeEOF>\n",
Out.data());
ASSERT_EQ(Directives.size(), 11u);
EXPECT_EQ(Directives[0].Kind, pp_include);
More information about the cfe-commits
mailing list