[clang] [C++20][Modules] Implement P1857R3 Modules Dependency Discovery (PR #107168)

via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 3 17:00:39 PDT 2024


https://github.com/yronglin created https://github.com/llvm/llvm-project/pull/107168

None

>From fb028015b8f86f87b6d1643e91c21531c768a198 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Sat, 17 Aug 2024 16:54:26 +0800
Subject: [PATCH 1/2] [Clang] Add peekNextPPToken, makes peek next token
 without side-effects

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Lex/Lexer.h        | 10 ++++----
 clang/include/clang/Lex/Preprocessor.h |  8 ++++++-
 clang/include/clang/Lex/TokenLexer.h   |  7 +++---
 clang/lib/Lex/Lexer.cpp                | 21 +++++++++--------
 clang/lib/Lex/PPMacroExpansion.cpp     | 32 ++++++++++++--------------
 clang/lib/Lex/TokenLexer.cpp           | 10 ++++----
 6 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index b6ecc7e5ded9e2..1e665c13b392f2 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
   //===--------------------------------------------------------------------===//
   // Context that changes as the file is lexed.
   // NOTE: any state that mutates when in raw mode must have save/restore code
-  // in Lexer::isNextPPTokenLParen.
+  // in Lexer::peekNextPPToken.
 
   // BufferPtr - Current pointer into the buffer.  This is the next character
   // to be lexed.
@@ -629,10 +629,10 @@ class Lexer : public PreprocessorLexer {
     BufferPtr = TokEnd;
   }
 
-  /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
-  /// tok::l_paren token, 0 if it is something else and 2 if there are no more
-  /// tokens in the buffer controlled by this lexer.
-  unsigned isNextPPTokenLParen();
+  /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+  /// buffer controlled by this lexer, otherwise return the next unexpanded
+  /// token.
+  std::optional<Token> peekNextPPToken();
 
   //===--------------------------------------------------------------------===//
   // Lexer character reading interfaces.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 1307659e27d137..0ab138974aeb20 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2650,10 +2650,16 @@ class Preprocessor {
 
   void removeCachedMacroExpandedTokensOfLastLexer();
 
+  /// Peek the next token. If so, return the token, if not, this
+  /// method should have no observable side-effect on the lexed tokens.
+  std::optional<Token> peekNextPPToken();
+
   /// Determine whether the next preprocessor token to be
   /// lexed is a '('.  If so, consume the token and return true, if not, this
   /// method should have no observable side-effect on the lexed tokens.
-  bool isNextPPTokenLParen();
+  bool isNextPPTokenLParen() {
+    return peekNextPPToken().value_or(Token{}).is(tok::l_paren);
+  }
 
   /// After reading "MACRO(", this method is invoked to read all of the formal
   /// arguments specified for the macro invocation.  Returns null on error.
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 4d229ae6106743..777b4e6266c714 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -139,10 +139,9 @@ class TokenLexer {
   void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
             bool OwnsTokens, bool IsReinject);
 
-  /// If the next token lexed will pop this macro off the
-  /// expansion stack, return 2.  If the next unexpanded token is a '(', return
-  /// 1, otherwise return 0.
-  unsigned isNextTokenLParen() const;
+  /// If the next token lexed will pop this macro off the expansion stack,
+  /// return std::nullopt, otherwise return the next unexpanded token.
+  std::optional<Token> peekNextPPToken() const;
 
   /// Lex and return a token from this macro stream.
   bool Lex(Token &Tok);
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index ef1e1f4bd9aeb4..af533b3874cf5d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3193,18 +3193,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
   return PP->HandleEndOfFile(Result, isPragmaLexer());
 }
 
-/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
-/// the specified lexer will return a tok::l_paren token, 0 if it is something
-/// else and 2 if there are no more tokens in the buffer controlled by the
-/// lexer.
-unsigned Lexer::isNextPPTokenLParen() {
+/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+/// buffer controlled by this lexer, otherwise return the next unexpanded
+/// token.
+std::optional<Token> Lexer::peekNextPPToken() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
   if (isDependencyDirectivesLexer()) {
     if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
-      return 2;
-    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
-        tok::l_paren);
+      return std::nullopt;
+    Token Result;
+    (void)convertDependencyDirectiveToken(
+        DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
+    return Result;
   }
 
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
@@ -3233,8 +3234,8 @@ unsigned Lexer::isNextPPTokenLParen() {
   LexingRawMode = false;
 
   if (Tok.is(tok::eof))
-    return 2;
-  return Tok.is(tok::l_paren);
+    return std::nullopt;
+  return Tok;
 }
 
 /// Find the end of a version control conflict marker.
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 1d671ab72b0c03..0daa7fa96f89b1 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -437,42 +437,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
   return !llvm::is_contained(MI->params(), II);
 }
 
-/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
-/// lexed is a '('.  If so, consume the token and return true, if not, this
-/// method should have no observable side-effect on the lexed tokens.
-bool Preprocessor::isNextPPTokenLParen() {
+/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not,
+/// this method should have no observable side-effect on the lexed tokens.
+std::optional<Token> Preprocessor::peekNextPPToken() {
   // Do some quick tests for rejection cases.
-  unsigned Val;
+  std::optional<Token> Val;
   if (CurLexer)
-    Val = CurLexer->isNextPPTokenLParen();
+    Val = CurLexer->peekNextPPToken();
   else
-    Val = CurTokenLexer->isNextTokenLParen();
+    Val = CurTokenLexer->peekNextPPToken();
 
-  if (Val == 2) {
+  if (!Val) {
     // We have run off the end.  If it's a source file we don't
     // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
     // macro stack.
     if (CurPPLexer)
-      return false;
+      return std::nullopt;
     for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
       if (Entry.TheLexer)
-        Val = Entry.TheLexer->isNextPPTokenLParen();
+        Val = Entry.TheLexer->peekNextPPToken();
       else
-        Val = Entry.TheTokenLexer->isNextTokenLParen();
+        Val = Entry.TheTokenLexer->peekNextPPToken();
 
-      if (Val != 2)
+      if (Val)
         break;
 
       // Ran off the end of a source file?
       if (Entry.ThePPLexer)
-        return false;
+        return std::nullopt;
     }
   }
 
-  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we
-  // have found something that isn't a '(' or we found the end of the
-  // translation unit.  In either case, return false.
-  return Val == 1;
+  // Okay, we found the token and return.  Otherwise we found the end of the
+  // translation unit.
+  return Val;
 }
 
 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 856d5682727fe3..0eca09ef93da92 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -922,13 +922,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
 }
 
 /// isNextTokenLParen - If the next token lexed will pop this macro off the
-/// expansion stack, return 2.  If the next unexpanded token is a '(', return
-/// 1, otherwise return 0.
-unsigned TokenLexer::isNextTokenLParen() const {
+/// expansion stack, return std::nullopt, otherwise return the next unexpanded
+/// token.
+std::optional<Token> TokenLexer::peekNextPPToken() const {
   // Out of tokens?
   if (isAtEnd())
-    return 2;
-  return Tokens[CurTokenIdx].is(tok::l_paren);
+    return std::nullopt;
+  return Tokens[CurTokenIdx];
 }
 
 /// isParsingPreprocessorDirective - Return true if we are in the middle of a

>From b7b4e2549a5fb723b455768f2bcf75bfab6cab04 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Wed, 4 Sep 2024 00:57:52 +0800
Subject: [PATCH 2/2] [C++20][Modules] Implement P1857R3 Modules Dependency
 Discovery

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 .../include/clang/Basic/DiagnosticLexKinds.td |   6 +-
 .../clang/Basic/DiagnosticParseKinds.td       |   4 +-
 clang/include/clang/Basic/IdentifierTable.h   |  27 +-
 clang/include/clang/Basic/TokenKinds.def      |   6 +
 clang/include/clang/Lex/Preprocessor.h        |  87 +++++-
 clang/include/clang/Lex/Token.h               |   7 +
 clang/include/clang/Parse/Parser.h            |   4 -
 clang/lib/Basic/IdentifierTable.cpp           |   4 +-
 clang/lib/Lex/DependencyDirectivesScanner.cpp |  28 +-
 clang/lib/Lex/Lexer.cpp                       |  34 +-
 clang/lib/Lex/PPDirectives.cpp                | 214 ++++++++++++-
 clang/lib/Lex/Preprocessor.cpp                | 293 +++++-------------
 clang/lib/Lex/TokenLexer.cpp                  |   3 +-
 clang/lib/Parse/Parser.cpp                    |  65 +---
 .../Lex/DependencyDirectivesScannerTest.cpp   |  10 +-
 15 files changed, 483 insertions(+), 309 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205ee9..afd56fb44b2e71 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -477,7 +477,7 @@ def warn_cxx98_compat_variadic_macro : Warning<
 def ext_named_variadic_macro : Extension<
   "named variadic macros are a GNU extension">, InGroup<VariadicMacros>;
 def err_embedded_directive : Error<
-  "embedding a #%0 directive within macro arguments is not supported">;
+  "embedding a %select{#|C++ }0%1 directive within macro arguments is not supported">;
 def ext_embedded_directive : Extension<
   "embedding a directive within macro arguments has undefined behavior">,
   InGroup<DiagGroup<"embedded-directive">>;
@@ -952,6 +952,10 @@ def warn_module_conflict : Warning<
   InGroup<ModuleConflict>;
 
 // C++20 modules
+def err_module_decl_in_header : Error<
+  "module declaration must not come from an #include directive">;
+def err_pp_cond_span_module_decl : Error<
+  "preprocessor conditionals shall not span a module declaration">;
 def err_header_import_semi_in_macro : Error<
   "semicolon terminating header import declaration cannot be produced "
   "by a macro">;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 0b8ab4bf092509..c8654f12cb349a 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1695,8 +1695,8 @@ def ext_bit_int : Extension<
 } // end of Parse Issue category.
 
 let CategoryName = "Modules Issue" in {
-def err_unexpected_module_decl : Error<
-  "module declaration can only appear at the top level">;
+def err_unexpected_module_or_import_decl : Error<
+  "%select{module|import}0 declaration can only appear at the top level">;
 def err_module_expected_ident : Error<
   "expected a module name after '%select{module|import}0'">;
 def err_attribute_not_module_attr : Error<
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index ae9ebd9f59154e..9423676351e378 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsModulesImport : 1;
 
+  // True if this is the 'module' contextual keyword.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsModulesDecl : 1;
+
   // True if this is a mangled OpenMP variant name.
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsFinal : 1;
 
-  // 22 bits left in a 64-bit word.
+  // 21 bits left in a 64-bit word.
 
   // Managed by the language front-end.
   void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
         IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
         IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
         RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
-        IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
-        IsRestrictExpansion(false), IsFinal(false) {}
+        IsModulesDecl(false), IsMangledOpenMPVariantName(false),
+        IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
 
 public:
   IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
       RecomputeNeedsHandleIdentifier();
   }
 
+  /// Determine whether this is the contextual keyword \c module.
+  bool isModulesDeclaration() const { return IsModulesDecl; }
+
+  /// Set whether this identifier is the contextual keyword \c module.
+  void setModulesDeclaration(bool I) {
+    IsModulesDecl = I;
+    if (I)
+      NeedsHandleIdentifier = true;
+    else
+      RecomputeNeedsHandleIdentifier();
+  }
+
   /// Determine whether this is the mangled name of an OpenMP variant.
   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
 
@@ -737,10 +753,11 @@ class IdentifierTable {
     // contents.
     II->Entry = &Entry;
 
-    // If this is the 'import' contextual keyword, mark it as such.
+    // If this is the 'import' or 'module' contextual keyword, mark it as such.
     if (Name == "import")
       II->setModulesImport(true);
-
+    else if (Name == "module")
+      II->setModulesDeclaration(true);
     return *II;
   }
 
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 212c1f6ff3a124..6416920539db53 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -129,6 +129,9 @@ PPKEYWORD(pragma)
 // C23 & C++26 #embed
 PPKEYWORD(embed)
 
+// C++20 Module Directive
+PPKEYWORD(module)
+
 // GNU Extensions.
 PPKEYWORD(import)
 PPKEYWORD(include_next)
@@ -1014,6 +1017,9 @@ ANNOTATION(module_include)
 ANNOTATION(module_begin)
 ANNOTATION(module_end)
 
+// Annotations for C++, Clang and Objective-C named modules.
+ANNOTATION(module_name)
+
 // Annotation for a header_name token that has been looked up and transformed
 // into the name of a header unit.
 ANNOTATION(header_unit)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 0ab138974aeb20..a9acde7ac51df3 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -128,6 +128,70 @@ enum class EmbedResult {
   Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
 };
 
+/// Represents module or partition name token sequance.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+///
+/// This class can only be created by the preprocessor and guarantees that the
+/// two source array being contiguous in memory and only contains 3 kind of
+/// tokens (identifier, '.' and ':'). And only available when the preprocessor
+/// returns annot_module_name token.
+///
+/// For exmaple:
+///
+/// export module m.n:c.d
+///
+/// The module name array has 3 tokens ['m', '.', 'n'].
+/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
+///
+/// When import a partition in a named module fragment (Eg. import :part1;),
+/// the module name array will be empty, and the partition name array has 2
+/// tokens.
+///
+/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
+/// will not return a annot_module_name token, but will return 2 separate tokens
+/// [':', 'kw_private'].
+class ModuleNameInfo {
+  friend class Preprocessor;
+  ArrayRef<Token> ModuleName;
+  ArrayRef<Token> PartitionName;
+
+  ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
+
+public:
+  /// Return the contiguous token array.
+  ArrayRef<Token> getTokens() const {
+    if (ModuleName.empty())
+      return PartitionName;
+    if (PartitionName.empty())
+      return ModuleName;
+    return ArrayRef(ModuleName.begin(), PartitionName.end());
+  }
+  bool hasModuleName() const { return !ModuleName.empty(); }
+  bool hasPartitionName() const { return !PartitionName.empty(); }
+  ArrayRef<Token> getModuleName() const { return ModuleName; }
+  ArrayRef<Token> getPartitionName() const { return PartitionName; }
+  Token getColonToken() const {
+    assert(hasPartitionName() && "Do not have a partition name");
+    return getPartitionName().front();
+  }
+
+  /// Under the standard C++ Modules, the dot is just part of the module name,
+  /// and not a real hierarchy separator. Flatten such module names now.
+  std::string getFlatName() const;
+
+  void buildNamedModuleIdPath(
+      Preprocessor &P,
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -336,6 +400,15 @@ class Preprocessor {
 
   /// Whether the last token we lexed was an '@'.
   bool LastTokenWasAt = false;
+  
+  struct ExportContextualKeywordInfo {
+    Token ExportTok;
+    bool TokAtPhysicalStartOfLine;
+  };
+
+  /// Whether the last token we lexed was an 'export' keyword.
+  std::optional<ExportContextualKeywordInfo> LastTokenWasExportKeyword =
+      std::nullopt;
 
   /// A position within a C++20 import-seq.
   class StdCXXImportSeq {
@@ -1767,6 +1840,17 @@ class Preprocessor {
   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
                                                              bool ForHasEmbed);
 
+  bool LexModuleNameOrHeaderName(Token &Result, bool IsImport);
+  /// Callback invoked when the lexer sees one of export, import or module token
+  /// at the start of a line.
+  ///
+  /// This consumes the import, module directive, modifies the
+  /// lexer/preprocessor state, and advances the lexer(s) so that the next token
+  /// read is the correct one.
+  bool HandleModuleContextualKeyword(Token &Result, bool TokAtPhysicalStartOfLine);
+
+  void HandleModuleDirective(Token &ModuleOrImportKeyword);
+  void LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport);
   bool LexAfterModuleImport(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
@@ -2344,7 +2428,7 @@ class Preprocessor {
   ///
   /// \return The location of the end of the directive (the terminating
   /// newline).
-  SourceLocation CheckEndOfDirective(const char *DirType,
+  SourceLocation CheckEndOfDirective(StringRef DirType,
                                      bool EnableMacros = false);
 
   /// Read and discard all tokens remaining on the current line until
@@ -2785,6 +2869,7 @@ class Preprocessor {
   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
+  void HandleCXXModuleOrImportDirective(Token &KeywordTok);
   void HandleMicrosoftImportDirective(Token &Tok);
 
 public:
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 4f29fb7d114159..8e81207ddf8d7d 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -231,6 +231,9 @@ class Token {
     PtrData = const_cast<char*>(Ptr);
   }
 
+  template <class T> T getAnnotationValueAs() const {
+    return static_cast<T>(getAnnotationValue());
+  }
   void *getAnnotationValue() const {
     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     return PtrData;
@@ -289,6 +292,10 @@ class Token {
   /// Return the ObjC keyword kind.
   tok::ObjCKeywordKind getObjCKeywordID() const;
 
+  /// Return true if we have an C++20 Modules contextual keyword(export, import
+  /// or module).
+  bool isModuleContextualKeyword(bool AllowExport = true) const;
+
   bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
 
   /// Return true if this token has trigraphs or escaped newlines in it.
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index a7513069ff5da0..2bb1af2e01b527 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler {
   mutable IdentifierInfo *Ident_GNU_final;
   mutable IdentifierInfo *Ident_override;
 
-  // C++2a contextual keywords.
-  mutable IdentifierInfo *Ident_import;
-  mutable IdentifierInfo *Ident_module;
-
   // C++ type trait keywords that can be reverted to identifiers and still be
   // used as type traits.
   llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits;
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index c9c9d927a5902e..10ed86d5d5990c 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -326,8 +326,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
   if (LangOpts.IEEE128)
     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
 
-  // Add the 'import' contextual keyword.
+  // Add the 'import' and 'module' contextual keyword.
   get("import").setModulesImport(true);
+  get("module").setModulesDeclaration(true);
 }
 
 /// Checks if the specified token kind represents a keyword in the
@@ -456,6 +457,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   CASE( 6, 'd', 'f', define);
   CASE( 6, 'i', 'n', ifndef);
   CASE( 6, 'i', 'p', import);
+  CASE( 6, 'm', 'd', module);
   CASE( 6, 'p', 'a', pragma);
 
   CASE( 7, 'd', 'f', defined);
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a21..58e92977f99cc2 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -497,21 +497,32 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
   const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
   for (;;) {
     const dependency_directives_scan::Token &Tok = lexToken(First, End);
-    if (Tok.is(tok::eof))
+    if (Tok.isOneOf(tok::eof, tok::eod))
       return reportError(
           DirectiveLoc,
           diag::err_dep_source_scanner_missing_semi_after_at_import);
     if (Tok.is(tok::semi))
       break;
   }
+
+  // Skip extra tokens after semi in C++20 Modules directive.
+  bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl ||
+                      Kind == DirectiveKind::cxx_export_module_decl ||
+                      Kind == DirectiveKind::cxx_import_decl ||
+                      Kind == DirectiveKind::cxx_module_decl;
+  if (IsCXXModules)
+    lexPPDirectiveBody(First, End);
   pushDirective(Kind);
   skipWhitespace(First, End);
   if (First == End)
     return false;
-  if (!isVerticalWhitespace(*First))
-    return reportError(
-        DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
-  skipNewline(First, End);
+  if (!IsCXXModules) {
+    if (!isVerticalWhitespace(*First))
+      return reportError(
+          DirectiveLoc,
+          diag::err_dep_source_scanner_unexpected_tokens_at_import);
+    skipNewline(First, End);
+  }
   return false;
 }
 
@@ -846,8 +857,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
   if (*First == '@')
     return lexAt(First, End);
 
-  if (*First == 'i' || *First == 'e' || *First == 'm')
-    return lexModule(First, End);
+  // if (!LangOpts.CPlusPlusModules && (*First == 'i' || *First == 'e' || *First == 'm'))
+  //   return lexModule(First, End);
 
   if (*First == '_') {
     if (isNextIdentifierOrSkipLine("_Pragma", First, End))
@@ -860,7 +871,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
   TheLexer.setParsingPreprocessorDirective(true);
   auto ScEx2 = make_scope_exit(
       [&]() { TheLexer.setParsingPreprocessorDirective(false); });
-
+   if (*First == 'i' || *First == 'e' || *First == 'm')
+    return lexModule(First, End);
   // Lex '#'.
   const dependency_directives_scan::Token &HashTok = lexToken(First, End);
   if (HashTok.is(tok::hashhash)) {
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index af533b3874cf5d..9f363e70c66541 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -74,6 +74,19 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
   return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
 }
 
+/// Return true if we have an C++20 Modules contextual keyword(export, import
+/// or module).
+bool Token::isModuleContextualKeyword(bool AllowExport) const {
+  if (AllowExport && is(tok::kw_export))
+    return true;
+  if (isOneOf(tok::kw_import, tok::kw_module))
+    return true;
+  if (isNot(tok::identifier))
+    return false;
+  const auto *II = getIdentifierInfo();
+  return II->isModulesImport() || II->isModulesDeclaration();
+}
+
 /// Determine whether the token kind starts a simple-type-specifier.
 bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
   switch (getKind()) {
@@ -3996,11 +4009,17 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
   case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
   case 'o': case 'p': case 'q': case 'r': case 's': case 't':    /*'u'*/
   case 'v': case 'w': case 'x': case 'y': case 'z':
-  case '_':
+  case '_': {
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
-    return LexIdentifierContinue(Result, CurPtr);
-
+    bool returnedToken = LexIdentifierContinue(Result, CurPtr);
+    if (returnedToken && Result.isModuleContextualKeyword() &&
+        LangOpts.CPlusPlusModules &&
+        PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine) &&
+        !LexingRawMode && !Is_PragmaLexer)
+      goto HandleDirective;
+    return returnedToken;
+  }
   case '$':   // $ in identifiers.
     if (LangOpts.DollarIdents) {
       if (!isLexingRawMode())
@@ -4484,8 +4503,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
 
 HandleDirective:
   // We parsed a # character and it's the start of a preprocessing directive.
-
-  FormTokenWithChars(Result, CurPtr, tok::hash);
+  if (!Result.isOneOf(tok::kw_import, tok::kw_module))
+    FormTokenWithChars(Result, CurPtr, tok::hash);
   PP->HandleDirective(Result);
 
   if (PP->hadModuleLoaderFatalFailure())
@@ -4559,6 +4578,11 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
     Result.setRawIdentifierData(TokPtr);
     if (!isLexingRawMode()) {
       const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+      if (Result.isModuleContextualKeyword() &&
+          PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) {
+        PP->HandleDirective(Result);
+        return false;
+      }
       if (II->isHandleIdentifierCase())
         return PP->HandleIdentifier(Result);
     }
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 4e77df9ec444c7..b40de9d9184048 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -413,7 +413,7 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
 /// true, then we consider macros that expand to zero tokens as being ok.
 ///
 /// Returns the location of the end of the directive.
-SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
+SourceLocation Preprocessor::CheckEndOfDirective(StringRef DirType,
                                                  bool EnableMacros) {
   Token Tmp;
   // Lex unexpanded tokens for most directives: macros might expand to zero
@@ -1219,9 +1219,14 @@ void Preprocessor::HandleDirective(Token &Result) {
   // Save the '#' token in case we need to return it later.
   Token SavedHash = Result;
 
+  bool IsCXX20ImportOrModuleDirective =
+      getLangOpts().CPlusPlusModules &&
+      Result.isModuleContextualKeyword(/*AllowExport=*/false);
+
   // Read the next token, the directive flavor.  This isn't expanded due to
   // C99 6.10.3p8.
-  LexUnexpandedToken(Result);
+  if (!IsCXX20ImportOrModuleDirective)
+    LexUnexpandedToken(Result);
 
   // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
   //   #define A(x) #x
@@ -1240,7 +1245,9 @@ void Preprocessor::HandleDirective(Token &Result) {
       case tok::pp___include_macros:
       case tok::pp_pragma:
       case tok::pp_embed:
-        Diag(Result, diag::err_embedded_directive) << II->getName();
+      case tok::pp_module:
+        Diag(Result, diag::err_embedded_directive)
+            << IsCXX20ImportOrModuleDirective << II->getName();
         Diag(*ArgMacro, diag::note_macro_expansion_here)
             << ArgMacro->getIdentifierInfo();
         DiscardUntilEndOfDirective();
@@ -1331,9 +1338,12 @@ void Preprocessor::HandleDirective(Token &Result) {
     // C99 6.10.6 - Pragma Directive.
     case tok::pp_pragma:
       return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
-
+    case tok::pp_module:
+      return HandleCXXModuleOrImportDirective(Result);
     // GNU Extensions.
     case tok::pp_import:
+      if (IsCXX20ImportOrModuleDirective)
+        return HandleCXXModuleOrImportDirective(Result);
       return HandleImportDirective(SavedHash.getLocation(), Result);
     case tok::pp_include_next:
       return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
@@ -4012,3 +4022,199 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
                               *Params);
   HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
 }
+
+void Preprocessor::LexAfterModuleImport(SmallVectorImpl<Token> &Suffix, bool IsImport) {
+  Token Result;
+  Suffix.clear();
+Retry:
+  if (IsImport && getLangOpts().CPlusPlusModules) {
+    (void) LexHeaderName(Result);
+    if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
+      std::string Name = ModuleDeclState.getPrimaryName().str();
+      Name += ":";
+      NamedModuleImportPath.push_back(
+          {getIdentifierInfo(Name), Result.getLocation()});
+    }
+  } else {
+    Lex(Result);
+  }
+
+  Suffix.push_back(Result);
+  if (Result.isOneOf(tok::eof, tok::eod))
+    return;
+  if (Result.is(tok::code_completion))
+    goto Retry;
+
+  // Lex a module name
+  if (Result.isOneOf(tok::identifier, tok::colon)) {
+    bool ExpectsIdentifier = Result.is(tok::colon);
+    if (Result.is(tok::identifier))
+      NamedModuleImportPath.push_back(
+          std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
+    while (true) {
+      Lex(Result);
+      Suffix.push_back(Result);
+      if (ExpectsIdentifier && Result.is(tok::identifier)) {
+        ExpectsIdentifier = false;
+        // We expected to see an identifier here, and we did; continue handling
+        // identifiers.
+        NamedModuleImportPath.push_back(
+            std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
+        continue;
+      }
+
+      if (!ExpectsIdentifier && Result.is(tok::period)) {
+        ExpectsIdentifier = true;
+        continue;
+      }
+
+      // Module partition only allowed in C++20 Modules.
+      // FIXME: Should we accept partition here for error recovery?
+      if (!ExpectsIdentifier && Result.is(tok::colon)) {
+        ExpectsIdentifier = true;
+        continue;
+      }
+      break;
+    }
+
+    // Under the standard C++ Modules, the dot is just part of the module name,
+    // and not a real hierarchy separator. Flatten such module names now.
+    //
+    // FIXME: Is this the right level to be performing this transformation?
+    std::string FlatModuleName;
+    if (getLangOpts().CPlusPlusModules) {
+      for (auto &Piece : NamedModuleImportPath) {
+        // If the FlatModuleName ends with colon, it implies it is a partition.
+        if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
+          FlatModuleName += ".";
+        FlatModuleName += Piece.first->getName();
+      }
+      SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
+      NamedModuleImportPath.clear();
+      NamedModuleImportPath.push_back(
+          std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
+    }
+  }
+
+  // Consume the pp-import-suffix and expand any macros in it now, if we're not
+  // at the semicolon already.
+  SourceLocation SemiLoc = Result.getLocation();
+  if (Result.isNot(tok::semi)) {
+    CollectPpImportSuffix(Suffix);
+    if (Suffix.back().isNot(tok::semi))
+      return;
+    SemiLoc = Suffix.back().getLocation();
+  }
+
+  if (!IsImport)
+    return;
+
+  if (getLangOpts().CPlusPlusModules && Suffix.front().is(tok::colon) &&
+      !ModuleDeclState.isNamedModule())
+    return;
+
+  // C++2a [cpp.module]p1:
+  //   The ';' preprocessing-token terminating a pp-import shall not have
+  //   been produced by macro replacement.
+  if (getLangOpts().CPlusPlusModules && SemiLoc.isMacroID())
+    Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+  // Check for a header-name.
+  if (IsImport && Result.is(tok::header_name)) {
+    // Reconstitute the import token.
+    Token ImportTok;
+    ImportTok.startToken();
+    ImportTok.setKind(tok::kw_import);
+    ImportTok.setLocation(ModuleImportLoc);
+    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+    ImportTok.setLength(6);
+
+    auto Action = HandleHeaderIncludeOrImport(
+        /*HashLoc*/ SourceLocation(), ImportTok, Result, SemiLoc);
+    switch (Action.Kind) {
+    case ImportAction::None:
+      break;
+
+    case ImportAction::ModuleBegin:
+      // Let the parser know we're textually entering the module.
+      Suffix.emplace_back();
+      Suffix.back().startToken();
+      Suffix.back().setKind(tok::annot_module_begin);
+      Suffix.back().setLocation(SemiLoc);
+      Suffix.back().setAnnotationEndLoc(SemiLoc);
+      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+      [[fallthrough]];
+
+    case ImportAction::ModuleImport:
+    case ImportAction::HeaderUnitImport:
+    case ImportAction::SkippedModuleImport:
+      // We chose to import (or textually enter) the file. Convert the
+      // header-name token into a header unit annotation token.
+      Suffix[0].setKind(tok::annot_header_unit);
+      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+      // FIXME: Call the moduleImport callback?
+      break;
+    case ImportAction::Failure:
+      assert(TheModuleLoader.HadFatalFailure &&
+             "This should be an early exit only to a fatal error");
+      Result.setKind(tok::eof);
+      Suffix.clear();
+      Suffix.push_back(Result);
+      CurLexer->cutOffLexing();
+      return;
+    }
+    return;
+  }
+
+  // Check module name
+  Module *Imported = nullptr;
+  // We don't/shouldn't load the standard c++20 modules when preprocessing.
+  if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
+    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
+                                          NamedModuleImportPath,
+                                          Module::Hidden,
+                                          /*IsInclusionDirective=*/false);
+    if (Imported)
+      makeModuleVisible(Imported, SemiLoc);
+  }
+
+  if (Callbacks)
+    Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
+}
+
+void Preprocessor::HandleCXXModuleOrImportDirective(Token &ModuleTok) {
+  assert(ModuleTok.isOneOf(tok::kw_import, tok::kw_module));
+  SourceLocation DirectiveStartLoc =
+      LastTokenWasExportKeyword
+          ? LastTokenWasExportKeyword->ExportTok.getLocation()
+          : ModuleTok.getLocation();
+  bool IsImport = ModuleTok.is(tok::kw_import);
+  SmallVector<Token, 32> Suffix;
+  LexAfterModuleImport(Suffix, IsImport);
+
+  if (!Suffix.empty() && Suffix.back().is(tok::semi))
+    CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName());
+
+  if (ModuleTok.is(tok::kw_module)) {
+    if (!IncludeMacroStack.empty()) {
+      Diag(DirectiveStartLoc, diag::err_module_decl_in_header)
+          << SourceRange(DirectiveStartLoc, Suffix.back().getLocation());
+    }
+
+    if (CurPPLexer->getConditionalStackDepth() != 0) {
+      Diag(DirectiveStartLoc, diag::err_pp_cond_span_module_decl)
+          << SourceRange(DirectiveStartLoc, Suffix.back().getLocation());
+    }
+  }
+
+  unsigned NumTokens = Suffix.size() + 1;
+  // Allocate a holding buffer for a sequence of tokens and introduce it into
+  // the token stream.
+  auto ToksCopy = std::make_unique<Token[]>(NumTokens);
+  ToksCopy[0] = ModuleTok;
+  std::copy(Suffix.begin(), Suffix.end(), &ToksCopy[1]);
+  EnterTokenStream(std::move(ToksCopy), NumTokens,
+                   /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+  return;
+}
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index f0b4593e0cc22e..eb694ce41566fb 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -905,6 +905,7 @@ void Preprocessor::Lex(Token &Result) {
     // This token is injected to represent the translation of '#include "a.h"'
     // into "import a.h;". Mimic the notional ';'.
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
     case tok::semi:
       TrackGMFState.handleSemi();
       StdCXXImportSeqState.handleSemi();
@@ -925,27 +926,20 @@ void Preprocessor::Lex(Token &Result) {
     case tok::period:
       ModuleDeclState.handlePeriod();
       break;
-    case tok::identifier:
-      // Check "import" and "module" when there is no open bracket. The two
-      // identifiers are not meaningful with open brackets.
+    case tok::kw_module:
+      TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
+      ModuleDeclState.handleModule();
+      break;
+    case tok::kw_import:
       if (StdCXXImportSeqState.atTopLevel()) {
-        if (Result.getIdentifierInfo()->isModulesImport()) {
-          TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
-          StdCXXImportSeqState.handleImport();
-          if (StdCXXImportSeqState.afterImportSeq()) {
-            ModuleImportLoc = Result.getLocation();
-            NamedModuleImportPath.clear();
-            IsAtImport = false;
-            ModuleImportExpectsIdentifier = true;
-            CurLexerCallback = CLK_LexAfterModuleImport;
-          }
-          break;
-        } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
-          TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
-          ModuleDeclState.handleModule();
-          break;
-        }
+        TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
+        StdCXXImportSeqState.handleImport();
       }
+      // ModuleImportLoc = Result.getLocation();
+      // NamedModuleImportPath.clear();
+      // IsAtImport = false;
+      break;
+    case tok::identifier:
       ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
       if (ModuleDeclState.isModuleCandidate())
         break;
@@ -964,6 +958,8 @@ void Preprocessor::Lex(Token &Result) {
   }
 
   LastTokenWasAt = Result.is(tok::at);
+  if (Result.isNot(tok::kw_export))
+    LastTokenWasExportKeyword.reset();
   --LexLevel;
 
   if ((LexLevel == 0 || PreprocessToken) &&
@@ -1111,7 +1107,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
       if (BracketDepth == 0)
         return;
     break;
-
+    case tok::eod:
     case tok::eof:
       return;
 
@@ -1121,213 +1117,82 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
   }
 }
 
-
-/// Lex a token following the 'import' contextual keyword.
-///
-///     pp-import: [C++20]
-///           import header-name pp-import-suffix[opt] ;
-///           import header-name-tokens pp-import-suffix[opt] ;
-/// [ObjC]    @ import module-name ;
-/// [Clang]   import module-name ;
-///
-///     header-name-tokens:
-///           string-literal
-///           < [any sequence of preprocessing-tokens other than >] >
-///
-///     module-name:
-///           module-name-qualifier[opt] identifier
-///
-///     module-name-qualifier
-///           module-name-qualifier[opt] identifier .
+/// P1857R3: Modules Dependency Discovery
 ///
-/// We respond to a pp-import by importing macros from the named module.
-bool Preprocessor::LexAfterModuleImport(Token &Result) {
-  // Figure out what kind of lexer we actually have.
-  recomputeCurLexerKind();
+/// At the start of phase 4 an import or module token is treated as starting a
+/// directive and are converted to their respective keywords iff:
+///   • After skipping horizontal whitespace are
+///     • at the start of a logical line, or
+///     • preceded by an 'export' at the start of the logical line.
+///   • Are followed by an identifier pp token (before macro expansion), or
+///     • <, ", or : (but not ::) pp tokens for 'import', or
+///     • ; for 'module'
+/// Otherwise the token is treated as an identifier.
+bool Preprocessor::HandleModuleContextualKeyword(
+    Token &Result, bool TokAtPhysicalStartOfLine) {
+  if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
+    return false;
 
-  // Lex the next token. The header-name lexing rules are used at the start of
-  // a pp-import.
-  //
-  // For now, we only support header-name imports in C++20 mode.
-  // FIXME: Should we allow this in all language modes that support an import
-  // declaration as an extension?
-  if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
-    if (LexHeaderName(Result))
-      return true;
-
-    if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
-      std::string Name = ModuleDeclState.getPrimaryName().str();
-      Name += ":";
-      NamedModuleImportPath.push_back(
-          {getIdentifierInfo(Name), Result.getLocation()});
-      CurLexerCallback = CLK_LexAfterModuleImport;
-      return true;
-    }
-  } else {
-    Lex(Result);
+  if (Result.is(tok::kw_export)) {
+    LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
+    return false;
   }
 
-  // Allocate a holding buffer for a sequence of tokens and introduce it into
-  // the token stream.
-  auto EnterTokens = [this](ArrayRef<Token> Toks) {
-    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
-    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
-    EnterTokenStream(std::move(ToksCopy), Toks.size(),
-                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
-  };
-
-  bool ImportingHeader = Result.is(tok::header_name);
-  // Check for a header-name.
-  SmallVector<Token, 32> Suffix;
-  if (ImportingHeader) {
-    // Enter the header-name token into the token stream; a Lex action cannot
-    // both return a token and cache tokens (doing so would corrupt the token
-    // cache if the call to Lex comes from CachingLex / PeekAhead).
-    Suffix.push_back(Result);
-
-    // Consume the pp-import-suffix and expand any macros in it now. We'll add
-    // it back into the token stream later.
-    CollectPpImportSuffix(Suffix);
-    if (Suffix.back().isNot(tok::semi)) {
-      // This is not a pp-import after all.
-      EnterTokens(Suffix);
+  if (LastTokenWasExportKeyword) {
+    auto Export = *LastTokenWasExportKeyword;
+    LastTokenWasExportKeyword.reset();
+    // The export keyword was not at the start of line, it's not a
+    // directive-introducing token.
+    if (!Export.TokAtPhysicalStartOfLine)
       return false;
-    }
-
-    // C++2a [cpp.module]p1:
-    //   The ';' preprocessing-token terminating a pp-import shall not have
-    //   been produced by macro replacement.
-    SourceLocation SemiLoc = Suffix.back().getLocation();
-    if (SemiLoc.isMacroID())
-      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
-
-    // Reconstitute the import token.
-    Token ImportTok;
-    ImportTok.startToken();
-    ImportTok.setKind(tok::kw_import);
-    ImportTok.setLocation(ModuleImportLoc);
-    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
-    ImportTok.setLength(6);
-
-    auto Action = HandleHeaderIncludeOrImport(
-        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
-    switch (Action.Kind) {
-    case ImportAction::None:
-      break;
-
-    case ImportAction::ModuleBegin:
-      // Let the parser know we're textually entering the module.
-      Suffix.emplace_back();
-      Suffix.back().startToken();
-      Suffix.back().setKind(tok::annot_module_begin);
-      Suffix.back().setLocation(SemiLoc);
-      Suffix.back().setAnnotationEndLoc(SemiLoc);
-      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
-      [[fallthrough]];
-
-    case ImportAction::ModuleImport:
-    case ImportAction::HeaderUnitImport:
-    case ImportAction::SkippedModuleImport:
-      // We chose to import (or textually enter) the file. Convert the
-      // header-name token into a header unit annotation token.
-      Suffix[0].setKind(tok::annot_header_unit);
-      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
-      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
-      // FIXME: Call the moduleImport callback?
-      break;
-    case ImportAction::Failure:
-      assert(TheModuleLoader.HadFatalFailure &&
-             "This should be an early exit only to a fatal error");
-      Result.setKind(tok::eof);
-      CurLexer->cutOffLexing();
-      EnterTokens(Suffix);
-      return true;
-    }
-
-    EnterTokens(Suffix);
+    // [cpp.pre]/1.4
+    // export                  // not a preprocessing directive
+    // import foo;             // preprocessing directive (ill-formed at phase
+    // 7)
+    if (TokAtPhysicalStartOfLine)
+      return false;
+  } else if (!TokAtPhysicalStartOfLine)
     return false;
-  }
 
-  // The token sequence
-  //
-  //   import identifier (. identifier)*
-  //
-  // indicates a module import directive. We already saw the 'import'
-  // contextual keyword, so now we're looking for the identifiers.
-  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
-    // We expected to see an identifier here, and we did; continue handling
-    // identifiers.
-    NamedModuleImportPath.push_back(
-        std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
-    ModuleImportExpectsIdentifier = false;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
-  }
-
-  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
-  // see the next identifier. (We can also see a '[[' that begins an
-  // attribute-specifier-seq here under the Standard C++ Modules.)
-  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
-    ModuleImportExpectsIdentifier = true;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
-  }
-
-  // If we didn't recognize a module name at all, this is not a (valid) import.
-  if (NamedModuleImportPath.empty() || Result.is(tok::eof))
+  bool SavedParsingPreprocessorDirective = CurPPLexer->ParsingPreprocessorDirective;
+  CurPPLexer->ParsingPreprocessorDirective = true;
+  // Peek next token.
+  auto NextTok = peekNextPPToken().value_or(Token{});
+  CurPPLexer->ParsingPreprocessorDirective = SavedParsingPreprocessorDirective;
+  if (Result.getIdentifierInfo()->isModulesImport() &&
+      NextTok.isOneOf(tok::raw_identifier, tok::less, tok::string_literal,
+                      tok::colon)) {
+    Result.setKind(tok::kw_import);
+    NamedModuleImportPath.clear();
+    ModuleImportLoc = Result.getLocation();
+    IsAtImport = false;
     return true;
-
-  // Consume the pp-import-suffix and expand any macros in it now, if we're not
-  // at the semicolon already.
-  SourceLocation SemiLoc = Result.getLocation();
-  if (Result.isNot(tok::semi)) {
-    Suffix.push_back(Result);
-    CollectPpImportSuffix(Suffix);
-    if (Suffix.back().isNot(tok::semi)) {
-      // This is not an import after all.
-      EnterTokens(Suffix);
-      return false;
-    }
-    SemiLoc = Suffix.back().getLocation();
   }
-
-  // Under the standard C++ Modules, the dot is just part of the module name,
-  // and not a real hierarchy separator. Flatten such module names now.
-  //
-  // FIXME: Is this the right level to be performing this transformation?
-  std::string FlatModuleName;
-  if (getLangOpts().CPlusPlusModules) {
-    for (auto &Piece : NamedModuleImportPath) {
-      // If the FlatModuleName ends with colon, it implies it is a partition.
-      if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
-        FlatModuleName += ".";
-      FlatModuleName += Piece.first->getName();
-    }
-    SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
+  if (Result.getIdentifierInfo()->isModulesDeclaration() &&
+      NextTok.isOneOf(tok::raw_identifier, tok::colon, tok::semi)) {
+    Result.setKind(tok::kw_module);
     NamedModuleImportPath.clear();
-    NamedModuleImportPath.push_back(
-        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
-  }
-
-  Module *Imported = nullptr;
-  // We don't/shouldn't load the standard c++20 modules when preprocessing.
-  if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
-    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
-                                          NamedModuleImportPath,
-                                          Module::Hidden,
-                                          /*IsInclusionDirective=*/false);
-    if (Imported)
-      makeModuleVisible(Imported, SemiLoc);
+    return true;
   }
 
-  if (Callbacks)
-    Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported);
+  // Ok, it's an identifier.
+  return false;
+}
 
-  if (!Suffix.empty()) {
-    EnterTokens(Suffix);
-    return false;
-  }
-  return true;
+bool Preprocessor::LexAfterModuleImport(Token &Result) {
+  recomputeCurLexerKind();
+  // Allocate a holding buffer for a sequence of tokens and introduce it into
+  // the token stream.
+  auto EnterTokens = [this](ArrayRef<Token> Toks) {
+    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+    EnterTokenStream(std::move(ToksCopy), Toks.size(),
+                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+  };
+  SmallVector<Token, 32> Suffix;
+  LexAfterModuleImport(Suffix, true);
+  EnterTokens(Suffix);
+  return false;
 }
 
 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 0eca09ef93da92..8fc39a5c517b81 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -700,7 +700,8 @@ bool TokenLexer::Lex(Token &Tok) {
   HasLeadingSpace = false;
 
   // Handle recursive expansion!
-  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
+  if (!Tok.isAnnotation() && !Tok.isModuleContextualKeyword() &&
+      Tok.getIdentifierInfo() != nullptr) {
     // Change the kind of this identifier to the appropriate token kind, e.g.
     // turning "for" into a keyword.
     IdentifierInfo *II = Tok.getIdentifierInfo();
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 04c2f1d380bc48..6d3312684c1f90 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -514,8 +514,6 @@ void Parser::Initialize() {
   Ident_abstract = nullptr;
   Ident_override = nullptr;
   Ident_GNU_final = nullptr;
-  Ident_import = nullptr;
-  Ident_module = nullptr;
 
   Ident_super = &PP.getIdentifierTable().get("super");
 
@@ -571,11 +569,6 @@ void Parser::Initialize() {
     PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block);
   }
 
-  if (getLangOpts().CPlusPlusModules) {
-    Ident_import = PP.getIdentifierInfo("import");
-    Ident_module = PP.getIdentifierInfo("module");
-  }
-
   Actions.Initialize();
 
   // Prime the lexer look-ahead.
@@ -639,30 +632,12 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     switch (NextToken().getKind()) {
     case tok::kw_module:
       goto module_decl;
-
-    // Note: no need to handle kw_import here. We only form kw_import under
-    // the Standard C++ Modules, and in that case 'export import' is parsed as
-    // an export-declaration containing an import-declaration.
-
-    // Recognize context-sensitive C++20 'export module' and 'export import'
-    // declarations.
-    case tok::identifier: {
-      IdentifierInfo *II = NextToken().getIdentifierInfo();
-      if ((II == Ident_module || II == Ident_import) &&
-          GetLookAheadToken(2).isNot(tok::coloncolon)) {
-        if (II == Ident_module)
-          goto module_decl;
-        else
-          goto import_decl;
-      }
-      break;
-    }
-
+    case tok::kw_import:
+      goto import_decl;
     default:
       break;
     }
     break;
-
   case tok::kw_module:
   module_decl:
     Result = ParseModuleDecl(ImportState);
@@ -725,22 +700,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     Actions.ActOnEndOfTranslationUnit();
     //else don't tell Sema that we ended parsing: more input might come.
     return true;
-
-  case tok::identifier:
-    // C++2a [basic.link]p3:
-    //   A token sequence beginning with 'export[opt] module' or
-    //   'export[opt] import' and not immediately followed by '::'
-    //   is never interpreted as the declaration of a top-level-declaration.
-    if ((Tok.getIdentifierInfo() == Ident_module ||
-         Tok.getIdentifierInfo() == Ident_import) &&
-        NextToken().isNot(tok::coloncolon)) {
-      if (Tok.getIdentifierInfo() == Ident_module)
-        goto module_decl;
-      else
-        goto import_decl;
-    }
-    break;
-
   default:
     break;
   }
@@ -956,14 +915,6 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs,
     };
     Actions.CodeCompletion().CodeCompleteOrdinaryName(getCurScope(), PCC);
     return nullptr;
-  case tok::kw_import: {
-    Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module;
-    if (getLangOpts().CPlusPlusModules) {
-      llvm_unreachable("not expecting a c++20 import here");
-      ProhibitAttributes(Attrs);
-    }
-    SingleDecl = ParseModuleImport(SourceLocation(), IS);
-  } break;
   case tok::kw_export:
     if (getLangOpts().CPlusPlusModules || getLangOpts().HLSL) {
       ProhibitAttributes(Attrs);
@@ -1048,9 +999,10 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs,
   case tok::kw___if_not_exists:
     ParseMicrosoftIfExistsExternalDeclaration();
     return nullptr;
-
+  case tok::kw_import:
   case tok::kw_module:
-    Diag(Tok, diag::err_unexpected_module_decl);
+    Diag(Tok, diag::err_unexpected_module_or_import_decl)
+        << Tok.is(tok::kw_import);
     SkipUntil(tok::semi);
     return nullptr;
 
@@ -2462,10 +2414,7 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
                                  ? Sema::ModuleDeclKind::Interface
                                  : Sema::ModuleDeclKind::Implementation;
 
-  assert(
-      (Tok.is(tok::kw_module) ||
-       (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) &&
-      "not a module declaration");
+  assert(Tok.is(tok::kw_module) && "not a module declaration");
   SourceLocation ModuleLoc = ConsumeToken();
 
   // Attributes appear after the module name, not before.
@@ -2557,7 +2506,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
   SourceLocation ExportLoc;
   TryConsumeToken(tok::kw_export, ExportLoc);
 
-  assert((AtLoc.isInvalid() ? Tok.isOneOf(tok::kw_import, tok::identifier)
+  assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import)
                             : Tok.isObjCAtKeyword(tok::objc_import)) &&
          "Improper start to module import");
   bool IsObjCAtImport = Tok.isObjCAtKeyword(tok::objc_import);
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118c..49c005b245d207 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -986,11 +986,11 @@ ort \
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives));
   EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;"
-               "exp\\\nort import:l[[rename]];"
-               "import<<=3;import a b d e d e f e;"
-               "import foo[[no_unique_address]];import foo();"
-               "import f(:sefse);import f(->a=3);"
-               "<TokBeforeEOF>\n",
+               "\nexp\\\nort import:l[[rename]];"
+               "\nimport<<=3;\nimport a b d e d e f e;"
+               "\nimport foo[[no_unique_address]];\nimport foo();"
+               "\nimport f(:sefse);\nimport f(->a=3);"
+               "\n<TokBeforeEOF>\n",
                Out.data());
   ASSERT_EQ(Directives.size(), 11u);
   EXPECT_EQ(Directives[0].Kind, pp_include);



More information about the cfe-commits mailing list