[clang] Reapply "[Clang] Implement P3034R1 Module Declarations Shouldn’t be Macros" (PR #102135)

Thu Aug 15 08:19:51 PDT 2024

https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/102135

>From efa62c3c8572f9c94cf0e2e9ac5a1fb11746ddc7 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Tue, 23 Jul 2024 19:46:49 +0800
Subject: [PATCH 1/6] =?UTF-8?q?[Clang]=20Implement=20P3034R1=20Module=20De?=
 =?UTF-8?q?clarations=20Shouldn=E2=80=99t=20be=20Macros?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/docs/ReleaseNotes.rst                   |   2 +
 .../include/clang/Basic/DiagnosticLexKinds.td |   5 +
 clang/include/clang/Basic/IdentifierTable.h   |  24 +-
 clang/include/clang/Basic/TokenKinds.def      |   3 +
 clang/include/clang/Lex/Preprocessor.h        |  83 +++-
 clang/include/clang/Lex/Token.h               |   3 +
 clang/include/clang/Parse/Parser.h            |   2 +-
 clang/lib/Basic/IdentifierTable.cpp           |   3 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  12 +-
 clang/lib/Lex/PPLexerChange.cpp               |   9 +-
 clang/lib/Lex/Preprocessor.cpp                | 444 ++++++++++++------
 clang/lib/Lex/TokenConcatenation.cpp          |  10 +
 clang/lib/Parse/ParseDecl.cpp                 |   8 +-
 clang/lib/Parse/Parser.cpp                    |  93 ++--
 clang/test/CXX/cpp/cpp.module/p2.cppm         |  88 ++++
 .../basic/basic.link/module-declaration.cpp   |  61 +--
 .../dcl.module/dcl.module.import/p1.cppm      |  39 +-
 clang/test/SemaCXX/modules.cppm               |  89 ++--
 clang/www/cxx_status.html                     |   2 +-
 19 files changed, 717 insertions(+), 263 deletions(-)
 create mode 100644 clang/test/CXX/cpp/cpp.module/p2.cppm

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c7bd099420abf..a5953a3641fff7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -89,6 +89,8 @@ C++2c Feature Support
 - Add ``__builtin_is_virtual_base_of`` intrinsic, which supports
   `P2985R0 A type trait for detecting virtual base classes <https://wg21.link/p2985r0>`_
 
+- Implemented `P3034R1 Module Declarations Shouldn’t be Macros <https://wg21.link/P3034R1>`_.
+
 Resolutions to C++ Defect Reports
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 12d7b8c0205ee9..08ece01009387d 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -952,6 +952,11 @@ def warn_module_conflict : Warning<
   InGroup<ModuleConflict>;
 
 // C++20 modules
+def err_module_decl_cannot_be_macros : Error<
+  "the module name in a module%select{| partition}0 declaration cannot contain "
+  "an object-like macro %1">;
+def err_unxepected_paren_in_module_decl : Error<
+  "unexpected '(' after the module name in a module%select{| partition}0 declaration">;
 def err_header_import_semi_in_macro : Error<
   "semicolon terminating header import declaration cannot be produced "
   "by a macro">;
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index ae9ebd9f59154e..f40f74d0355ade 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -180,6 +180,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsModulesImport : 1;
 
+  // True if this is the 'module' contextual keyword.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned IsModulesDecl : 1;
+
   // True if this is a mangled OpenMP variant name.
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsMangledOpenMPVariantName : 1;
@@ -196,7 +200,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsFinal : 1;
 
-  // 22 bits left in a 64-bit word.
+  // 21 bits left in a 64-bit word.
 
   // Managed by the language front-end.
   void *FETokenInfo = nullptr;
@@ -212,8 +216,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
         IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
         IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
         RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
-        IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
-        IsRestrictExpansion(false), IsFinal(false) {}
+        IsModulesDecl(false), IsMangledOpenMPVariantName(false),
+        IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
 
 public:
   IdentifierInfo(const IdentifierInfo &) = delete;
@@ -520,6 +524,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
       RecomputeNeedsHandleIdentifier();
   }
 
+  /// Determine whether this is the contextual keyword \c module.
+  bool isModulesDeclaration() const { return IsModulesDecl; }
+
+  /// Set whether this identifier is the contextual keyword \c module.
+  void setModulesDeclaration(bool I) {
+    IsModulesDecl = I;
+    if (I)
+      NeedsHandleIdentifier = true;
+    else
+      RecomputeNeedsHandleIdentifier();
+  }
+
   /// Determine whether this is the mangled name of an OpenMP variant.
   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
 
@@ -740,6 +756,8 @@ class IdentifierTable {
     // If this is the 'import' contextual keyword, mark it as such.
     if (Name == "import")
       II->setModulesImport(true);
+    else if (Name == "module")
+      II->setModulesDeclaration(true);
 
     return *II;
   }
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 7e638dc1ddcdba..bea46f617e690d 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -1006,6 +1006,9 @@ ANNOTATION(module_include)
 ANNOTATION(module_begin)
 ANNOTATION(module_end)
 
+// Annotations for C++, Clang and Objective-C named modules.
+ANNOTATION(module_name)
+
 // Annotation for a header_name token that has been looked up and transformed
 // into the name of a header unit.
 ANNOTATION(header_unit)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 623f868ca1e648..c35b768ffea1bc 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -615,10 +615,6 @@ class Preprocessor {
 
   ModuleDeclSeq ModuleDeclState;
 
-  /// Whether the module import expects an identifier next. Otherwise,
-  /// it expects a '.' or ';'.
-  bool ModuleImportExpectsIdentifier = false;
-
   /// The identifier and source location of the currently-active
   /// \#pragma clang arc_cf_code_audited begin.
   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
@@ -1763,11 +1759,14 @@ class Preprocessor {
   /// Lex a token, forming a header-name token if possible.
   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
 
+  /// Lex a module name or a partition name.
+  bool LexModuleName(Token &Result, bool IsImport);
+
   /// Lex the parameters for an #embed directive, returns nullopt on error.
   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
                                                              bool ForHasEmbed);
-
   bool LexAfterModuleImport(Token &Result);
+  bool LexAfterModuleDecl(Token &Result);
   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
 
   void makeModuleVisible(Module *M, SourceLocation Loc);
@@ -3059,6 +3058,9 @@ class Preprocessor {
   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
     return P.LexAfterModuleImport(Result);
   }
+  static bool CLK_LexAfterModuleDecl(Preprocessor &P, Token &Result) {
+    return P.LexAfterModuleDecl(Result);
+  }
 };
 
 /// Abstract base class that describes a handler that will receive
@@ -3091,6 +3093,77 @@ struct EmbedAnnotationData {
 /// Registry of pragma handlers added by plugins
 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
 
+/// Represents module or partition name token sequance.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+///
+/// This class can only be created by the preprocessor and guarantees that the
+/// two source array being contiguous in memory and only contains 3 kind of
+/// tokens (identifier, '.' and ':'). And only available when the preprocessor
+/// returns annot_module_name token.
+///
+/// For exmaple:
+///
+/// export module m.n:c.d
+///
+/// The module name array has 3 tokens ['m', '.', 'n'].
+/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
+///
+/// When import a partition in a named module fragment (Eg. import :part1;),
+/// the module name array will be empty, and the partition name array has 2
+/// tokens.
+///
+/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
+/// will not return a annot_module_name token, but will return 2 separate tokens
+/// [':', 'kw_private'].
+
+class ModuleNameInfo {
+  friend class Preprocessor;
+  ArrayRef<Token> ModuleName;
+  ArrayRef<Token> PartitionName;
+
+  ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
+
+public:
+  /// Return the contiguous token array.
+  ArrayRef<Token> getTokens() const {
+    if (ModuleName.empty())
+      return PartitionName;
+    if (PartitionName.empty())
+      return ModuleName;
+    return ArrayRef(ModuleName.begin(), PartitionName.end());
+  }
+  bool hasModuleName() const { return !ModuleName.empty(); }
+  bool hasPartitionName() const { return !PartitionName.empty(); }
+  ArrayRef<Token> getModuleName() const { return ModuleName; }
+  ArrayRef<Token> getPartitionName() const { return PartitionName; }
+  Token getColonToken() const {
+    assert(hasPartitionName() && "Do not have a partition name");
+    return getPartitionName().front();
+  }
+
+  /// Under the standard C++ Modules, the dot is just part of the module name,
+  /// and not a real hierarchy separator. Flatten such module names now.
+  std::string getFlatName() const;
+
+  /// Build a module id path from the contiguous token array, both include
+  /// module name and partition name.
+  void getModuleIdPath(
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
+
+  /// Build a module id path from \param ModuleName.
+  static void getModuleIdPath(
+      ArrayRef<Token> ModuleName,
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 4f29fb7d114159..2be3ad39529f05 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -235,6 +235,9 @@ class Token {
     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     return PtrData;
   }
+  template <class T> T getAnnotationValueAs() const {
+    return static_cast<T>(getAnnotationValue());
+  }
   void setAnnotationValue(void *val) {
     assert(isAnnotation() && "Used AnnotVal on non-annotation token");
     PtrData = val;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index ba7d6866ebacd8..31c48f8805f4dc 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3878,7 +3878,7 @@ class Parser : public CodeCompletionHandler {
   }
 
   bool ParseModuleName(
-      SourceLocation UseLoc,
+      SourceLocation UseLoc, ArrayRef<Token> ModuleName,
       SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
       bool IsImport);
 
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index 4f7ccaf4021d63..97d830214f8900 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -322,8 +322,9 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
   if (LangOpts.IEEE128)
     AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
 
-  // Add the 'import' contextual keyword.
+  // Add the 'import' and 'module' contextual keyword.
   get("import").setModulesImport(true);
+  get("module").setModulesDeclaration(true);
 }
 
 /// Checks if the specified token kind represents a keyword in the
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 135dca0e6a1775..e21f2b945b86ad 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -758,9 +758,10 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
   // These tokens are not expanded to anything and don't need whitespace before
   // them.
   if (Tok.is(tok::eof) ||
-      (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
-       !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
-       !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed)))
+      (Tok.isAnnotation() && Tok.isNot(tok::annot_header_unit) &&
+       Tok.isNot(tok::annot_module_begin) && Tok.isNot(tok::annot_module_end) &&
+       Tok.isNot(tok::annot_module_name) &&
+       Tok.isNot(tok::annot_repl_input_end) && Tok.isNot(tok::annot_embed)))
     return;
 
   // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
@@ -951,6 +952,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
       PP.Lex(Tok);
       IsStartOfLine = true;
       continue;
+    } else if (Tok.is(tok::annot_module_name)) {
+      auto *Info = static_cast<ModuleNameInfo *>(Tok.getAnnotationValue());
+      *Callbacks->OS << Info->getFlatName();
+      PP.Lex(Tok);
+      continue;
     } else if (Tok.is(tok::annot_header_unit)) {
       // This is a header-name that has been (effectively) converted into a
       // module-name.
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index 8221db46e06acc..c3a903917e9ce1 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -122,7 +122,8 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
   CurPPLexer = TheLexer;
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
+  if (CurLexerCallback != CLK_LexAfterModuleImport &&
+      CurLexerCallback != CLK_LexAfterModuleDecl)
     CurLexerCallback = TheLexer->isDependencyDirectivesLexer()
                            ? CLK_DependencyDirectivesLexer
                            : CLK_Lexer;
@@ -161,8 +162,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
-    CurLexerCallback = CLK_TokenLexer;
+  CurLexerCallback = CLK_TokenLexer;
 }
 
 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
@@ -216,7 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerCallback != CLK_LexAfterModuleImport)
+  if (CurLexerCallback != CLK_LexAfterModuleImport &&
+      CurLexerCallback != CLK_LexAfterModuleDecl)
     CurLexerCallback = CLK_TokenLexer;
 }
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index f0b4593e0cc22e..d0c4ab8fd5b669 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -860,9 +860,15 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
     ModuleImportLoc = Identifier.getLocation();
     NamedModuleImportPath.clear();
     IsAtImport = true;
-    ModuleImportExpectsIdentifier = true;
     CurLexerCallback = CLK_LexAfterModuleImport;
   }
+
+  if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) &&
+      !InMacroArgs && !DisableMacroExpansion &&
+      (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) &&
+      CurLexerCallback != CLK_CachingLexer) {
+    CurLexerCallback = CLK_LexAfterModuleDecl;
+  }
   return true;
 }
 
@@ -905,6 +911,7 @@ void Preprocessor::Lex(Token &Result) {
     // This token is injected to represent the translation of '#include "a.h"'
     // into "import a.h;". Mimic the notional ';'.
     case tok::annot_module_include:
+    case tok::annot_repl_input_end:
     case tok::semi:
       TrackGMFState.handleSemi();
       StdCXXImportSeqState.handleSemi();
@@ -919,12 +926,30 @@ void Preprocessor::Lex(Token &Result) {
       StdCXXImportSeqState.handleExport();
       ModuleDeclState.handleExport();
       break;
-    case tok::colon:
-      ModuleDeclState.handleColon();
-      break;
-    case tok::period:
-      ModuleDeclState.handlePeriod();
+    case tok::annot_module_name: {
+      auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue());
+      for (const auto &Tok : Info->getTokens()) {
+        switch (Tok.getKind()) {
+        case tok::identifier:
+          ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo());
+          break;
+        case tok::period:
+          ModuleDeclState.handlePeriod();
+          break;
+        case tok::colon:
+          ModuleDeclState.handleColon();
+          break;
+        default:
+          llvm_unreachable("Unexpected token in module name");
+        }
+      }
+      if (ModuleDeclState.isModuleCandidate())
+        break;
+      TrackGMFState.handleMisc();
+      StdCXXImportSeqState.handleMisc();
+      ModuleDeclState.handleMisc();
       break;
+    }
     case tok::identifier:
       // Check "import" and "module" when there is no open bracket. The two
       // identifiers are not meaningful with open brackets.
@@ -936,17 +961,17 @@ void Preprocessor::Lex(Token &Result) {
             ModuleImportLoc = Result.getLocation();
             NamedModuleImportPath.clear();
             IsAtImport = false;
-            ModuleImportExpectsIdentifier = true;
             CurLexerCallback = CLK_LexAfterModuleImport;
           }
           break;
-        } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
+        }
+        if (Result.getIdentifierInfo()->isModulesDeclaration()) {
           TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
           ModuleDeclState.handleModule();
+          CurLexerCallback = CLK_LexAfterModuleDecl;
           break;
         }
       }
-      ModuleDeclState.handleIdentifier(Result.getIdentifierInfo());
       if (ModuleDeclState.isModuleCandidate())
         break;
       [[fallthrough]];
@@ -1121,6 +1146,151 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
   }
 }
 
+ModuleNameInfo::ModuleNameInfo(ArrayRef<Token> AnnotToks,
+                               std::optional<unsigned> ColonIndex) {
+  assert(!AnnotToks.empty() && "Named module token cannot be empty.");
+  if (!ColonIndex.has_value())
+    ColonIndex = AnnotToks.size();
+  ModuleName = ArrayRef(AnnotToks.begin(), AnnotToks.begin() + *ColonIndex);
+  PartitionName = ArrayRef(AnnotToks.begin() + *ColonIndex, AnnotToks.end());
+  assert(ModuleName.end() == PartitionName.begin());
+}
+
+std::string ModuleNameInfo::getFlatName() const {
+  std::string FlatModuleName;
+  for (auto &Tok : getTokens()) {
+    switch (Tok.getKind()) {
+    case tok::identifier:
+      FlatModuleName += Tok.getIdentifierInfo()->getName();
+      break;
+    case tok::period:
+      FlatModuleName += '.';
+      break;
+    case tok::colon:
+      FlatModuleName += ':';
+      break;
+    default:
+      llvm_unreachable("Unexpected token in module name");
+    }
+  }
+  return FlatModuleName;
+}
+
+void ModuleNameInfo::getModuleIdPath(
+    SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const {
+  return getModuleIdPath(getTokens(), Path);
+}
+
+void ModuleNameInfo::getModuleIdPath(
+    ArrayRef<Token> ModuleName,
+    SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) {
+  for (const auto &Tok : ModuleName) {
+    if (Tok.is(tok::identifier))
+      Path.push_back(
+          std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
+  }
+}
+
+/// Lex a module name or a partition name.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+bool Preprocessor::LexModuleName(Token &Result, bool IsImport) {
+  bool ExpectsIdentifier = true, IsLexingPartition = false;
+  SmallVector<Token, 8> ModuleName;
+  std::optional<unsigned> ColonTokIndex;
+  auto LexNextToken = [&](Token &Tok) {
+    if (IsImport)
+      Lex(Tok);
+    else
+      LexUnexpandedToken(Tok);
+  };
+
+  while (true) {
+    LexNextToken(Result);
+    if (ExpectsIdentifier && Result.is(tok::identifier)) {
+      auto *MI = getMacroInfo(Result.getIdentifierInfo());
+      if (getLangOpts().CPlusPlusModules && !IsImport && MI &&
+          MI->isObjectLike()) {
+        Diag(Result, diag::err_module_decl_cannot_be_macros)
+            << Result.getLocation() << IsLexingPartition
+            << Result.getIdentifierInfo();
+      }
+      ModuleName.push_back(Result);
+      ExpectsIdentifier = false;
+      continue;
+    }
+
+    if (!ExpectsIdentifier && Result.is(tok::period)) {
+      ModuleName.push_back(Result);
+      ExpectsIdentifier = true;
+      continue;
+    }
+
+    // Module partition only allowed in C++20 Modules.
+    if (getLangOpts().CPlusPlusModules && Result.is(tok::colon)) {
+      // Handle the form like: import :P;
+      // If the token after ':' is not an identifier, this is a invalid module
+      // name.
+      if (ModuleName.empty()) {
+        Token Tmp;
+        LexNextToken(Tmp);
+        EnterToken(Tmp, /*IsReiject=*/false);
+        // A private-module-fragment:
+        // export module :private;
+        if (!IsImport && Tmp.is(tok::kw_private))
+          return true;
+        // import :N;
+        if (IsImport && Tmp.isNot(tok::identifier))
+          return false;
+      } else if (!ExpectsIdentifier) {
+        ExpectsIdentifier = true;
+      }
+      IsLexingPartition = true;
+      ColonTokIndex = ModuleName.size();
+      ModuleName.push_back(Result);
+      continue;
+    }
+
+    // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a (
+    // preprocessing token [...]
+    //
+    // We only emit diagnostic in the preprocessor, and in the parser we skip
+    // invalid tokens and recover from errors.
+    if (getLangOpts().CPlusPlusModules && !ExpectsIdentifier &&
+        Result.is(tok::l_paren))
+      Diag(Result, diag::err_unxepected_paren_in_module_decl)
+          << IsLexingPartition;
+    break;
+  }
+
+  // Put the last token back to stream, it's not a valid part of module name.
+  // We lexed it unexpanded but it might be a valid macro expansion
+  Result.clearFlag(Token::DisableExpand);
+  auto ToksCopy = std::make_unique<Token[]>(1);
+  *ToksCopy.get() = Result;
+  EnterTokenStream(std::move(ToksCopy), 1,
+                   /*DisableMacroExpansion=*/false,
+                   /*IsReinject=*/false);
+
+  if (ModuleName.empty())
+    return false;
+  Result.startToken();
+  Result.setKind(tok::annot_module_name);
+  Result.setLocation(ModuleName.front().getLocation());
+  Result.setAnnotationEndLoc(ModuleName.back().getLocation());
+  auto AnnotToks = ArrayRef(ModuleName).copy(getPreprocessorAllocator());
+  ModuleNameInfo *Info =
+      new (getPreprocessorAllocator()) ModuleNameInfo(AnnotToks, ColonTokIndex);
+  Result.setAnnotationValue(static_cast<void *>(Info));
+  return true;
+}
 
 /// Lex a token following the 'import' contextual keyword.
 ///
@@ -1145,6 +1315,17 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
   recomputeCurLexerKind();
 
+  // Allocate a holding buffer for a sequence of tokens and introduce it into
+  // the token stream.
+  auto EnterTokens = [this](ArrayRef<Token> Toks) {
+    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+    EnterTokenStream(std::move(ToksCopy), Toks.size(),
+                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+  };
+
+  SmallVector<Token, 32> Suffix;
+
   // Lex the next token. The header-name lexing rules are used at the start of
   // a pp-import.
   //
@@ -1155,122 +1336,108 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     if (LexHeaderName(Result))
       return true;
 
-    if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) {
-      std::string Name = ModuleDeclState.getPrimaryName().str();
-      Name += ":";
-      NamedModuleImportPath.push_back(
-          {getIdentifierInfo(Name), Result.getLocation()});
-      CurLexerCallback = CLK_LexAfterModuleImport;
-      return true;
-    }
-  } else {
-    Lex(Result);
-  }
+    // Check for a header-name.
+    if (Result.is(tok::header_name)) {
+      // Enter the header-name token into the token stream; a Lex action cannot
+      // both return a token and cache tokens (doing so would corrupt the token
+      // cache if the call to Lex comes from CachingLex / PeekAhead).
+      Suffix.push_back(Result);
+
+      // Consume the pp-import-suffix and expand any macros in it now. We'll add
+      // it back into the token stream later.
+      CollectPpImportSuffix(Suffix);
+      if (Suffix.back().isNot(tok::semi)) {
+        // This is not a pp-import after all.
+        EnterTokens(Suffix);
+        return false;
+      }
 
-  // Allocate a holding buffer for a sequence of tokens and introduce it into
-  // the token stream.
-  auto EnterTokens = [this](ArrayRef<Token> Toks) {
-    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
-    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
-    EnterTokenStream(std::move(ToksCopy), Toks.size(),
-                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
-  };
+      // C++2a [cpp.module]p1:
+      //   The ';' preprocessing-token terminating a pp-import shall not have
+      //   been produced by macro replacement.
+      SourceLocation SemiLoc = Suffix.back().getLocation();
+      if (SemiLoc.isMacroID())
+        Diag(SemiLoc, diag::err_header_import_semi_in_macro);
+
+      // Reconstitute the import token.
+      Token ImportTok;
+      ImportTok.startToken();
+      ImportTok.setKind(tok::kw_import);
+      ImportTok.setLocation(ModuleImportLoc);
+      ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
+      ImportTok.setLength(6);
+
+      auto Action = HandleHeaderIncludeOrImport(
+          /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
+      switch (Action.Kind) {
+      case ImportAction::None:
+        break;
 
-  bool ImportingHeader = Result.is(tok::header_name);
-  // Check for a header-name.
-  SmallVector<Token, 32> Suffix;
-  if (ImportingHeader) {
-    // Enter the header-name token into the token stream; a Lex action cannot
-    // both return a token and cache tokens (doing so would corrupt the token
-    // cache if the call to Lex comes from CachingLex / PeekAhead).
-    Suffix.push_back(Result);
+      case ImportAction::ModuleBegin:
+        // Let the parser know we're textually entering the module.
+        Suffix.emplace_back();
+        Suffix.back().startToken();
+        Suffix.back().setKind(tok::annot_module_begin);
+        Suffix.back().setLocation(SemiLoc);
+        Suffix.back().setAnnotationEndLoc(SemiLoc);
+        Suffix.back().setAnnotationValue(Action.ModuleForHeader);
+        [[fallthrough]];
+
+      case ImportAction::ModuleImport:
+      case ImportAction::HeaderUnitImport:
+      case ImportAction::SkippedModuleImport:
+        // We chose to import (or textually enter) the file. Convert the
+        // header-name token into a header unit annotation token.
+        Suffix[0].setKind(tok::annot_header_unit);
+        Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
+        Suffix[0].setAnnotationValue(Action.ModuleForHeader);
+        // FIXME: Call the moduleImport callback?
+        break;
+      case ImportAction::Failure:
+        assert(TheModuleLoader.HadFatalFailure &&
+               "This should be an early exit only to a fatal error");
+        Result.setKind(tok::eof);
+        CurLexer->cutOffLexing();
+        EnterTokens(Suffix);
+        return true;
+      }
 
-    // Consume the pp-import-suffix and expand any macros in it now. We'll add
-    // it back into the token stream later.
-    CollectPpImportSuffix(Suffix);
-    if (Suffix.back().isNot(tok::semi)) {
-      // This is not a pp-import after all.
       EnterTokens(Suffix);
       return false;
     }
+  } else {
+    Lex(Result);
+  }
 
-    // C++2a [cpp.module]p1:
-    //   The ';' preprocessing-token terminating a pp-import shall not have
-    //   been produced by macro replacement.
-    SourceLocation SemiLoc = Suffix.back().getLocation();
-    if (SemiLoc.isMacroID())
-      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
-
-    // Reconstitute the import token.
-    Token ImportTok;
-    ImportTok.startToken();
-    ImportTok.setKind(tok::kw_import);
-    ImportTok.setLocation(ModuleImportLoc);
-    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
-    ImportTok.setLength(6);
-
-    auto Action = HandleHeaderIncludeOrImport(
-        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
-    switch (Action.Kind) {
-    case ImportAction::None:
-      break;
-
-    case ImportAction::ModuleBegin:
-      // Let the parser know we're textually entering the module.
-      Suffix.emplace_back();
-      Suffix.back().startToken();
-      Suffix.back().setKind(tok::annot_module_begin);
-      Suffix.back().setLocation(SemiLoc);
-      Suffix.back().setAnnotationEndLoc(SemiLoc);
-      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
-      [[fallthrough]];
-
-    case ImportAction::ModuleImport:
-    case ImportAction::HeaderUnitImport:
-    case ImportAction::SkippedModuleImport:
-      // We chose to import (or textually enter) the file. Convert the
-      // header-name token into a header unit annotation token.
-      Suffix[0].setKind(tok::annot_header_unit);
-      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
-      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
-      // FIXME: Call the moduleImport callback?
-      break;
-    case ImportAction::Failure:
-      assert(TheModuleLoader.HadFatalFailure &&
-             "This should be an early exit only to a fatal error");
-      Result.setKind(tok::eof);
-      CurLexer->cutOffLexing();
-      EnterTokens(Suffix);
+  if (Result.isOneOf(tok::identifier, tok::colon)) {
+    EnterToken(Result, /*IsReinject=*/false);
+    if (!LexModuleName(Result, /*IsImport=*/true))
       return true;
+    auto *Info = Result.getAnnotationValueAs<ModuleNameInfo *>();
+    if (getLangOpts().CPlusPlusModules) {
+      // Under the standard C++ Modules, the dot is just part of the module
+      // name, and not a real hierarchy separator. Flatten such module names
+      // now.
+      //
+      // FIXME: Is this the right level to be performing this transformation?
+      std::string FlatModuleName;
+      if (Info->getTokens().front().is(tok::colon)) {
+        // Import a module partition allowed in C++20 Modules.
+        // We can import a partition in named module TU.
+        if (NamedModuleImportPath.empty() && ModuleDeclState.isNamedModule())
+          FlatModuleName = llvm::Twine(ModuleDeclState.getPrimaryName())
+                               .concat(Info->getFlatName())
+                               .str();
+        else
+          return true;
+      } else {
+        FlatModuleName = Info->getFlatName();
+      }
+      NamedModuleImportPath.emplace_back(getIdentifierInfo(FlatModuleName),
+                                         Result.getLocation());
+    } else {
+      Info->getModuleIdPath(NamedModuleImportPath);
     }
-
-    EnterTokens(Suffix);
-    return false;
-  }
-
-  // The token sequence
-  //
-  //   import identifier (. identifier)*
-  //
-  // indicates a module import directive. We already saw the 'import'
-  // contextual keyword, so now we're looking for the identifiers.
-  if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
-    // We expected to see an identifier here, and we did; continue handling
-    // identifiers.
-    NamedModuleImportPath.push_back(
-        std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
-    ModuleImportExpectsIdentifier = false;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
-  }
-
-  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
-  // see the next identifier. (We can also see a '[[' that begins an
-  // attribute-specifier-seq here under the Standard C++ Modules.)
-  if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
-    ModuleImportExpectsIdentifier = true;
-    CurLexerCallback = CLK_LexAfterModuleImport;
-    return true;
   }
 
   // If we didn't recognize a module name at all, this is not a (valid) import.
@@ -1291,24 +1458,6 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     SemiLoc = Suffix.back().getLocation();
   }
 
-  // Under the standard C++ Modules, the dot is just part of the module name,
-  // and not a real hierarchy separator. Flatten such module names now.
-  //
-  // FIXME: Is this the right level to be performing this transformation?
-  std::string FlatModuleName;
-  if (getLangOpts().CPlusPlusModules) {
-    for (auto &Piece : NamedModuleImportPath) {
-      // If the FlatModuleName ends with colon, it implies it is a partition.
-      if (!FlatModuleName.empty() && FlatModuleName.back() != ':')
-        FlatModuleName += ".";
-      FlatModuleName += Piece.first->getName();
-    }
-    SourceLocation FirstPathLoc = NamedModuleImportPath[0].second;
-    NamedModuleImportPath.clear();
-    NamedModuleImportPath.push_back(
-        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
-  }
-
   Module *Imported = nullptr;
   // We don't/shouldn't load the standard c++20 modules when preprocessing.
   if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
@@ -1330,6 +1479,33 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
   return true;
 }
 
+/// Lex a token following the 'module' contextual keyword.
+///
+/// [cpp.module]/p2:
+/// The pp-tokens, if any, of a pp-module shall be of the form:
+///       pp-module-name pp-module-partition[opt] pp-tokens[opt]
+///
+/// where the pp-tokens (if any) shall not begin with a ( preprocessing token
+/// and the grammar non-terminals are defined as:
+///       pp-module-name:
+///             pp-module-name-qualifierp[opt] identifier
+///       pp-module-partition:
+///             : pp-module-name-qualifier[opt] identifier
+///       pp-module-name-qualifier:
+///             identifier .
+///             pp-module-name-qualifier identifier .
+/// No identifier in the pp-module-name or pp-module-partition shall currently
+/// be defined as an object-like macro.
+///
+/// [cpp.module]/p3:
+/// Any preprocessing tokens after the module preprocessing token in the module
+/// directive are processed just as in normal text.
+bool Preprocessor::LexAfterModuleDecl(Token &Result) {
+  // Figure out what kind of lexer we actually have.
+  recomputeCurLexerKind();
+  return LexModuleName(Result, /*IsImport=*/false);
+}
+
 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
   CurSubmoduleState->VisibleModules.setVisible(
       M, Loc, [](Module *) {},
diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp
index 865879d1805336..cdb636923b9e91 100644
--- a/clang/lib/Lex/TokenConcatenation.cpp
+++ b/clang/lib/Lex/TokenConcatenation.cpp
@@ -160,6 +160,13 @@ static char GetFirstChar(const Preprocessor &PP, const Token &Tok) {
 bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
                                      const Token &PrevTok,
                                      const Token &Tok) const {
+  // If previous token is a module name, we need avoid concat it with current
+  // token, otherwise, there will has an extra space between 'M' and ';' for the
+  // following code:
+  //
+  // import M;
+  if (PrevTok.is(tok::annot_module_name))
+    return false;
   // Conservatively assume that every annotation token that has a printable
   // form requires whitespace.
   if (PrevTok.isAnnotation())
@@ -190,6 +197,9 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
       return true;
     ConcatInfo &= ~aci_avoid_equal;
   }
+
+  if (Tok.is(tok::annot_module_name))
+    return true;
   if (Tok.isAnnotation()) {
     // Modules annotation can show up when generated automatically for includes.
     assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin,
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 4a2d9a650e20cc..713e32b1a313f3 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4002,7 +4002,13 @@ void Parser::ParseDeclarationSpecifiers(
 
       // We're done with the declaration-specifiers.
       goto DoneWithDeclSpec;
-
+    case tok::annot_module_name: {
+      PP.EnterTokenStream(
+          Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(),
+          /*DisableMacroExpansion=*/true, /*IsReinject=*/false);
+      ConsumeAnyToken();
+      [[fallthrough]];
+    }
       // typedef-name
     case tok::kw___super:
     case tok::kw_decltype:
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 5ebe71e496a2e8..afb2e1e4161682 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -2511,18 +2511,28 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) {
   }
 
   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
-  if (ParseModuleName(ModuleLoc, Path, /*IsImport*/ false))
+  if (Tok.isNot(tok::annot_module_name)) {
+    Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/false;
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return nullptr;
+  }
+
+  auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>();
+  ConsumeAnnotationToken();
+  if (ParseModuleName(ModuleLoc, Info->getModuleName(), Path,
+                      /*IsImport=*/false))
     return nullptr;
 
   // Parse the optional module-partition.
   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Partition;
-  if (Tok.is(tok::colon)) {
-    SourceLocation ColonLoc = ConsumeToken();
+  if (Info->hasPartitionName()) {
+    SourceLocation ColonLoc = Info->getColonToken().getLocation();
     if (!getLangOpts().CPlusPlusModules)
       Diag(ColonLoc, diag::err_unsupported_module_partition)
           << SourceRange(ColonLoc, Partition.back().second);
     // Recover by ignoring the partition name.
-    else if (ParseModuleName(ModuleLoc, Partition, /*IsImport*/ false))
+    else if (ParseModuleName(ModuleLoc, Info->getPartitionName(), Partition,
+                             /*IsImport=*/false))
       return nullptr;
   }
 
@@ -2581,18 +2591,32 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
     // This is a header import that the preprocessor mapped to a module import.
     HeaderUnit = reinterpret_cast<Module *>(Tok.getAnnotationValue());
     ConsumeAnnotationToken();
-  } else if (Tok.is(tok::colon)) {
-    SourceLocation ColonLoc = ConsumeToken();
-    if (!getLangOpts().CPlusPlusModules)
-      Diag(ColonLoc, diag::err_unsupported_module_partition)
-          << SourceRange(ColonLoc, Path.back().second);
-    // Recover by leaving partition empty.
-    else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true))
-      return nullptr;
-    else
-      IsPartition = true;
   } else {
-    if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true))
+    if (Tok.isNot(tok::annot_module_name)) {
+      if (Tok.is(tok::code_completion)) {
+        cutOffParsing();
+        Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path);
+        return nullptr;
+      }
+      Diag(Tok, diag::err_module_expected_ident) << /*IsImport=*/true;
+      SkipUntil(tok::semi, StopBeforeMatch);
+      return nullptr;
+    }
+    auto *Info = Tok.getAnnotationValueAs<ModuleNameInfo *>();
+    ConsumeAnnotationToken();
+    if (Info->hasPartitionName()) {
+      SourceLocation ColonLoc = Info->getColonToken().getLocation();
+      if (!getLangOpts().CPlusPlusModules)
+        Diag(ColonLoc, diag::err_unsupported_module_partition)
+            << SourceRange(ColonLoc, Path.back().second);
+      // Recover by leaving partition empty.
+      else if (ParseModuleName(ColonLoc, Info->getPartitionName(), Path,
+                               /*IsImport=*/true))
+        return nullptr;
+      else
+        IsPartition = true;
+    } else if (ParseModuleName(ImportLoc, Info->getModuleName(), Path,
+                               /*IsImport=*/true))
       return nullptr;
   }
 
@@ -2689,32 +2713,31 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
 ///         module-name-qualifier:
 ///           module-name-qualifier[opt] identifier '.'
 bool Parser::ParseModuleName(
-    SourceLocation UseLoc,
+    SourceLocation UseLoc, ArrayRef<Token> ModuleName,
     SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
     bool IsImport) {
-  // Parse the module path.
-  while (true) {
-    if (!Tok.is(tok::identifier)) {
-      if (Tok.is(tok::code_completion)) {
-        cutOffParsing();
-        Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path);
-        return true;
-      }
-
-      Diag(Tok, diag::err_module_expected_ident) << IsImport;
-      SkipUntil(tok::semi);
+  ModuleNameInfo::getModuleIdPath(ModuleName, Path);
+  // Eg. import A.B.
+  if (ModuleName.back().isNot(tok::identifier)) {
+    if (Tok.is(tok::code_completion)) {
+      cutOffParsing();
+      Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path);
       return true;
     }
+    Diag(ModuleName.back(), diag::err_module_expected_ident) << IsImport;
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return true;
+  }
 
-    // Record this part of the module path.
-    Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
-    ConsumeToken();
-
-    if (Tok.isNot(tok::period))
-      return false;
-
-    ConsumeToken();
+  // [cpp.module]/p2: where the pp-tokens (if any) shall not begin with a (
+  // preprocessing token [...]
+  //
+  // Skip unitl ';' to recovery.
+  if (getLangOpts().CPlusPlusModules && Tok.is(tok::l_paren)) {
+    SkipUntil(tok::semi, StopBeforeMatch);
+    return true;
   }
+  return false;
 }
 
 /// Try recover parser when module annotation appears where it must not
diff --git a/clang/test/CXX/cpp/cpp.module/p2.cppm b/clang/test/CXX/cpp/cpp.module/p2.cppm
new file mode 100644
index 00000000000000..966a88ccfa972d
--- /dev/null
+++ b/clang/test/CXX/cpp/cpp.module/p2.cppm
@@ -0,0 +1,88 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/D.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/E.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/F.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/G.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/H.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/I.cppm -triple x86_64-linux-gnu -verify
+// RUN: %clang_cc1 -std=c++20 %t/J.cppm -triple x86_64-linux-gnu -verify
+
+//--- version.h
+#ifndef VERSION_H
+#define VERSION_H
+
+#define VERSION libv5
+#define A a
+#define B b
+#define C c
+#define FUNC_LIKE(X) function_like_##X
+#define ATTRS [[]]
+#define SEMICOLON ;
+
+#endif // VERSION_H
+
+//--- A.cppm
+module;
+#include "version.h"
+export module VERSION;  // expected-error {{the module name in a module declaration cannot contain an object-like macro 'VERSION'}}
+
+//--- B.cppm
+module;
+#include "version.h"
+export module A.B;      // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                        // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}}
+
+//--- C.cppm
+module;                             // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(foo):C;   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                    // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- D.cppm
+module;                               // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module B.A.FUNC_LIKE(bar):C;   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'B'}} \
+                                      // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                      // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- E.cppm
+module;
+#include "version.h"
+export module a.FUNC_LIKE:c; // OK, FUNC_LIKE would not be treated as a macro name.
+// expected-no-diagnostics
+
+//--- F.cppm
+module;
+#include "version.h"
+export module a.FUNC_LIKE:c ATTRS; // OK, FUNC_LIKE would not be treated as a macro name.
+// expected-no-diagnostics
+
+//--- G.cppm
+module;                               // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B c:C ATTRS // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                      // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- H.cppm
+module;                                   // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B,). c:C ATTRS  // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                          // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- I.cppm
+module;                                   // expected-error {{missing 'module' declaration at end of global module fragment introduced here}}
+#include "version.h"
+export module A.FUNC_LIKE(B,) c:C ATTRS   // expected-error {{the module name in a module declaration cannot contain an object-like macro 'A'}} \
+                                          // expected-error {{unexpected '(' after the module name in a module declaration}}
+
+//--- J.cppm
+module;
+#include "version.h"
+export module unexpanded : unexpanded ATTRS SEMICOLON // OK, ATTRS and SEMICOLON can be expanded.
+// expected-no-diagnostics
diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
index d71358cc7a571f..14bbc911febfcd 100644
--- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
+++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp
@@ -8,27 +8,19 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm
 //
 // Module implementation for unknown and known module. (The former is ill-formed.)
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \
-// RUN:            -DTEST=1 -DEXPORT= -DMODULE_NAME=z
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M.cpp \
-// RUN:            -DTEST=2 -DEXPORT= -DMODULE_NAME=x
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M1.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x=%t/x.pcm -fmodule-file=x.y=%t/x.y.pcm -verify -x c++ %t/M2.cpp
 //
 // Module interface for unknown and known module. (The latter is ill-formed due to
 // redefinition.)
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=3 -DEXPORT=export -DMODULE_NAME=z
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=4 -DEXPORT=export -DMODULE_NAME=x
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M3.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M4.cpp
 //
 // Miscellaneous syntax.
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=7 -DEXPORT=export -DMODULE_NAME='z elderberry'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=8 -DEXPORT=export -DMODULE_NAME='z [[]]'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=9 -DEXPORT=export -DMODULE_NAME='z [[fancy]]'
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M.cpp \
-// RUN:            -DTEST=10 -DEXPORT=export -DMODULE_NAME='z [[maybe_unused]]'
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M5.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M6.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M7.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -verify %t/M8.cpp
 
 //--- x.cppm
 export module x;
@@ -38,17 +30,26 @@ int a, b;
 export module x.y;
 int c;
 
-//--- M.cpp
-
-EXPORT module MODULE_NAME;
-#if TEST == 7
-// expected-error at -2 {{expected ';'}} expected-error at -2 {{a type specifier is required}}
-#elif TEST == 9
-// expected-warning at -4 {{unknown attribute 'fancy' ignored}}
-#elif TEST == 10
-// expected-error-re at -6 {{'maybe_unused' attribute cannot be applied to a module{{$}}}}
-#elif TEST == 1
-// expected-error at -8 {{module 'z' not found}}
-#else
-// expected-no-diagnostics
-#endif
+//--- M1.cpp
+module z; // expected-error {{module 'z' not found}}
+
+//--- M2.cpp
+module x; // expected-no-diagnostics
+
+//--- M3.cpp
+export module z; // expected-no-diagnostics
+
+//--- M4.cpp
+export module x; // expected-no-diagnostics
+
+//--- M5.cpp
+export module z elderberry; // expected-error {{expected ';'}} expected-error {{a type specifier is required}}
+
+//--- M6.cpp
+export module z [[]]; // expected-no-diagnostics
+
+//--- M7.cpp
+export module z [[fancy]]; // expected-warning {{unknown attribute 'fancy' ignored}}
+
+//--- M8.cpp
+export module z [[maybe_unused]]; // expected-error-re {{'maybe_unused' attribute cannot be applied to a module{{$}}}}
diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
index 873e4c0edeac25..ecad4db32a7e94 100644
--- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
+++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
@@ -6,10 +6,12 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface -fmodule-file=x=%t/x.pcm %t/x.y.cppm -o %t/x.y.pcm
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.b.cppm -o %t/a.b.pcm
 //
-// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.cpp \
-// RUN:            -DMODULE_NAME=z -DINTERFACE
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test-interface.cpp \
+// RUN:            -DINTERFACE
 // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \
-// RUN:            -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp -DMODULE_NAME=a.b
+// RUN:            -fmodule-file=a.b=%t/a.b.pcm -verify %t/test.cpp
+// RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm \
+// RUN:            -verify %t/test-module-not-found.cpp
 // RUN: %clang_cc1 -std=c++20 -I%t -fmodule-file=x.y=%t/x.y.pcm -fmodule-file=x=%t/x.pcm -verify %t/test.x.cpp
 
 //--- x.cppm
@@ -34,11 +36,8 @@ int use_2 = b; // ok
 int use_3 = c; // expected-error {{use of undeclared identifier 'c'}}
 
 //--- test.cpp
-#ifdef INTERFACE
-export module MODULE_NAME;
-#else
-module MODULE_NAME;
-#endif
+module;
+module a.b;
 
 import x;
 
@@ -51,6 +50,28 @@ import x.y;
 import x.; // expected-error {{expected a module name after 'import'}}
 import .x; // expected-error {{expected a module name after 'import'}}
 
-import blarg; // expected-error {{module 'blarg' not found}}
+int use_4 = c; // ok
+
+
+//--- test-interface.cpp
+module;
+export module z;
+
+import x;
+
+import x [[]];
+import x [[foo]]; // expected-warning {{unknown attribute 'foo' ignored}}
+import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applied to a module import}}
+import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn' ignored}}
+
+import x.y;
+import x.; // expected-error {{expected a module name after 'import'}}
+import .x; // expected-error {{expected a module name after 'import'}}
 
 int use_4 = c; // ok
+
+//--- test-module-not-found.cpp
+module;
+
+import blarg; // expected-error {{module 'blarg' not found}}
+
diff --git a/clang/test/SemaCXX/modules.cppm b/clang/test/SemaCXX/modules.cppm
index 41204be76eafa1..267417bf5da2ca 100644
--- a/clang/test/SemaCXX/modules.cppm
+++ b/clang/test/SemaCXX/modules.cppm
@@ -1,19 +1,17 @@
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.0.pcm -verify -DTEST=0
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -o %t.1.pcm -verify -DTEST=1
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify -DTEST=2
-// RUN:     %clang_cc1 -std=c++20 -emit-module-interface %s -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar -DTEST=3
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
 
-#if TEST == 0 || TEST == 2
-// expected-no-diagnostics
-#endif
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/A.cppm -o %t.0.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/B.cppm -o %t.1.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/C.cppm -fmodule-file=foo=%t.0.pcm -o %t.2.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/D.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/E.cppm -fmodule-file=foo=%t.0.pcm -o %t.3.pcm -verify -Dfoo=bar
 
+//--- A.cppm
 export module foo;
-
 static int m;
-
 int n;
-
-#if TEST == 0
 export {
   int a;
   int b;
@@ -27,7 +25,43 @@ export void f() {}
 
 export struct T {
 } t;
-#elif TEST == 3
+// expected-no-diagnostics
+
+//--- B.cppm
+export module foo;
+static int m;
+int n;
+struct S {
+  export int n;        // expected-error {{expected member name or ';'}}
+  export static int n; // expected-error {{expected member name or ';'}}
+};
+
+// FIXME: Exports of declarations without external linkage are disallowed.
+// Exports of declarations with non-external-linkage types are disallowed.
+
+// Cannot export within another export. This isn't precisely covered by the
+// language rules right now, but (per personal correspondence between zygoloid
+// and gdr) is the intent.
+export { // expected-note {{export block begins here}}
+  extern "C++" {
+    namespace NestedExport {
+      export { // expected-error {{export declaration appears within another export declaration}}
+        int q;
+      }
+    } // namespace NestedExport
+  }
+}
+
+//--- C.cppm
+export module foo;
+static int m;
+int n;
+// expected-no-diagnostics
+
+//--- D.cppm
+export module foo;
+static int m;
+int n;
 int use_a = a; // expected-error {{use of undeclared identifier 'a'}}
 
 #undef foo
@@ -46,29 +80,12 @@ int use_n = n; // FIXME: this should not be visible, because it is not exported
 
 extern int n;
 static_assert(&n != p); // expected-error{{use of undeclared identifier 'p'}}
-#endif
 
-#if TEST == 1
-struct S {
-  export int n;        // expected-error {{expected member name or ';'}}
-  export static int n; // expected-error {{expected member name or ';'}}
-};
-#endif
-
-// FIXME: Exports of declarations without external linkage are disallowed.
-// Exports of declarations with non-external-linkage types are disallowed.
+//--- E.cppm
+export module foo; // expected-error {{the module name in a module declaration cannot contain an object-like macro 'foo'}}
+static int m;
+int n;
+int use_a = a; // expected-error {{use of undeclared identifier 'a'}}
 
-// Cannot export within another export. This isn't precisely covered by the
-// language rules right now, but (per personal correspondence between zygoloid
-// and gdr) is the intent.
-#if TEST == 1
-export { // expected-note {{export block begins here}}
-  extern "C++" {
-  namespace NestedExport {
-  export { // expected-error {{export declaration appears within another export declaration}}
-    int q;
-  }
-  } // namespace NestedExport
-  }
-}
-#endif
+#undef foo
+import foo; // expected-error {{imports must immediately follow the module declaration}}
diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index a6ded8be3ae9e5..8fe05b21146282 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -182,7 +182,7 @@ <h2 id="cxx26">C++2c implementation status</h2>
  <tr>
   <td>Module Declarations Shouldn’t be Macros</td>
   <td><a href="https://wg21.link/P3034R1">P3034R1</a> (<a href="#dr">DR</a>)</td>
-  <td class="none" align="center">No</td>
+  <td class="unreleased" align="center">Clang 20</td>
  </tr>
  <tr>
   <td>Trivial infinite loops are not Undefined Behavior</td>

>From e25019f69e7131c8990ce81e36b97267690f2038 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Mon, 5 Aug 2024 22:47:33 +0800
Subject: [PATCH 2/6] [Clang] Add peekNextPPToken, makes peek next token
 without side-effects

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Lex/Lexer.h        | 10 ++++----
 clang/include/clang/Lex/Preprocessor.h |  8 ++++++-
 clang/include/clang/Lex/TokenLexer.h   |  7 +++---
 clang/lib/Lex/Lexer.cpp                | 21 +++++++++--------
 clang/lib/Lex/PPMacroExpansion.cpp     | 32 ++++++++++++--------------
 clang/lib/Lex/TokenLexer.cpp           | 10 ++++----
 6 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index b6ecc7e5ded9e2..1e665c13b392f2 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer {
   //===--------------------------------------------------------------------===//
   // Context that changes as the file is lexed.
   // NOTE: any state that mutates when in raw mode must have save/restore code
-  // in Lexer::isNextPPTokenLParen.
+  // in Lexer::peekNextPPToken.
 
   // BufferPtr - Current pointer into the buffer.  This is the next character
   // to be lexed.
@@ -629,10 +629,10 @@ class Lexer : public PreprocessorLexer {
     BufferPtr = TokEnd;
   }
 
-  /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a
-  /// tok::l_paren token, 0 if it is something else and 2 if there are no more
-  /// tokens in the buffer controlled by this lexer.
-  unsigned isNextPPTokenLParen();
+  /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+  /// buffer controlled by this lexer, otherwise return the next unexpanded
+  /// token.
+  std::optional<Token> peekNextPPToken();
 
   //===--------------------------------------------------------------------===//
   // Lexer character reading interfaces.
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index c35b768ffea1bc..85712c1248fe12 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2649,10 +2649,16 @@ class Preprocessor {
 
   void removeCachedMacroExpandedTokensOfLastLexer();
 
+  /// Peek the next token. If so, return the token, if not, this
+  /// method should have no observable side-effect on the lexed tokens.
+  std::optional<Token> peekNextPPToken();
+
   /// Determine whether the next preprocessor token to be
   /// lexed is a '('.  If so, consume the token and return true, if not, this
   /// method should have no observable side-effect on the lexed tokens.
-  bool isNextPPTokenLParen();
+  bool isNextPPTokenLParen() {
+    return peekNextPPToken().value_or(Token{}).is(tok::l_paren);
+  }
 
   /// After reading "MACRO(", this method is invoked to read all of the formal
   /// arguments specified for the macro invocation.  Returns null on error.
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
index 4d229ae6106743..777b4e6266c714 100644
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -139,10 +139,9 @@ class TokenLexer {
   void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
             bool OwnsTokens, bool IsReinject);
 
-  /// If the next token lexed will pop this macro off the
-  /// expansion stack, return 2.  If the next unexpanded token is a '(', return
-  /// 1, otherwise return 0.
-  unsigned isNextTokenLParen() const;
+  /// If the next token lexed will pop this macro off the expansion stack,
+  /// return std::nullopt, otherwise return the next unexpanded token.
+  std::optional<Token> peekNextPPToken() const;
 
   /// Lex and return a token from this macro stream.
   bool Lex(Token &Tok);
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index ef1e1f4bd9aeb4..af533b3874cf5d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3193,18 +3193,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
   return PP->HandleEndOfFile(Result, isPragmaLexer());
 }
 
-/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
-/// the specified lexer will return a tok::l_paren token, 0 if it is something
-/// else and 2 if there are no more tokens in the buffer controlled by the
-/// lexer.
-unsigned Lexer::isNextPPTokenLParen() {
+/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
+/// buffer controlled by this lexer, otherwise return the next unexpanded
+/// token.
+std::optional<Token> Lexer::peekNextPPToken() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
 
   if (isDependencyDirectivesLexer()) {
     if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
-      return 2;
-    return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is(
-        tok::l_paren);
+      return std::nullopt;
+    Token Result;
+    (void)convertDependencyDirectiveToken(
+        DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
+    return Result;
   }
 
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
@@ -3233,8 +3234,8 @@ unsigned Lexer::isNextPPTokenLParen() {
   LexingRawMode = false;
 
   if (Tok.is(tok::eof))
-    return 2;
-  return Tok.is(tok::l_paren);
+    return std::nullopt;
+  return Tok;
 }
 
 /// Find the end of a version control conflict marker.
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 1e31fcc3d731ed..0fadaeb6c2c681 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -437,42 +437,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
   return !llvm::is_contained(MI->params(), II);
 }
 
-/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
-/// lexed is a '('.  If so, consume the token and return true, if not, this
-/// method should have no observable side-effect on the lexed tokens.
-bool Preprocessor::isNextPPTokenLParen() {
+/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not,
+/// this method should have no observable side-effect on the lexed tokens.
+std::optional<Token> Preprocessor::peekNextPPToken() {
   // Do some quick tests for rejection cases.
-  unsigned Val;
+  std::optional<Token> Val;
   if (CurLexer)
-    Val = CurLexer->isNextPPTokenLParen();
+    Val = CurLexer->peekNextPPToken();
   else
-    Val = CurTokenLexer->isNextTokenLParen();
+    Val = CurTokenLexer->peekNextPPToken();
 
-  if (Val == 2) {
+  if (!Val) {
     // We have run off the end.  If it's a source file we don't
     // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
     // macro stack.
     if (CurPPLexer)
-      return false;
+      return std::nullopt;
     for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) {
       if (Entry.TheLexer)
-        Val = Entry.TheLexer->isNextPPTokenLParen();
+        Val = Entry.TheLexer->peekNextPPToken();
       else
-        Val = Entry.TheTokenLexer->isNextTokenLParen();
+        Val = Entry.TheTokenLexer->peekNextPPToken();
 
-      if (Val != 2)
+      if (Val)
         break;
 
       // Ran off the end of a source file?
       if (Entry.ThePPLexer)
-        return false;
+        return std::nullopt;
     }
   }
 
-  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we
-  // have found something that isn't a '(' or we found the end of the
-  // translation unit.  In either case, return false.
-  return Val == 1;
+  // Okay, we found the token and return.  Otherwise we found the end of the
+  // translation unit.
+  return Val;
 }
 
 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 856d5682727fe3..0eca09ef93da92 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -922,13 +922,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
 }
 
 /// isNextTokenLParen - If the next token lexed will pop this macro off the
-/// expansion stack, return 2.  If the next unexpanded token is a '(', return
-/// 1, otherwise return 0.
-unsigned TokenLexer::isNextTokenLParen() const {
+/// expansion stack, return std::nullopt, otherwise return the next unexpanded
+/// token.
+std::optional<Token> TokenLexer::peekNextPPToken() const {
   // Out of tokens?
   if (isAtEnd())
-    return 2;
-  return Tokens[CurTokenIdx].is(tok::l_paren);
+    return std::nullopt;
+  return Tokens[CurTokenIdx];
 }
 
 /// isParsingPreprocessorDirective - Return true if we are in the middle of a

>From 829b1c1296148c6ca201a0d7d7951cc762d322ed Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Mon, 5 Aug 2024 22:48:25 +0800
Subject: [PATCH 3/6] [Clang] Add IsCurrentLexingTokAtPhysicalStartOfLine in
 Lexer to avoid pass this flag as argument

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Lex/Lexer.h | 13 +++++----
 clang/lib/Lex/Lexer.cpp         | 47 +++++++++++++++------------------
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 1e665c13b392f2..142f3f05381298 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -136,6 +136,8 @@ class Lexer : public PreprocessorLexer {
 
   bool IsAtPhysicalStartOfLine;
 
+  bool IsCurrentLexingTokAtPhysicalStartOfLine;
+
   bool HasLeadingSpace;
 
   bool HasLeadingEmptyMacro;
@@ -609,7 +611,7 @@ class Lexer : public PreprocessorLexer {
   /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
   /// by Lex.
   ///
-  bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
+  bool LexTokenInternal(Token &Result);
 
   bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
 
@@ -749,12 +751,9 @@ class Lexer : public PreprocessorLexer {
   bool LexCharConstant       (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
   bool LexEndOfFile          (Token &Result, const char *CurPtr);
-  bool SkipWhitespace        (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
-  bool SkipLineComment       (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr,
-                              bool &TokAtPhysicalStartOfLine);
+  bool SkipWhitespace        (Token &Result, const char *CurPtr);
+  bool SkipLineComment       (Token &Result, const char *CurPtr);
+  bool SkipBlockComment      (Token &Result, const char *CurPtr);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
 
   bool IsStartOfConflictMarker(const char *CurPtr);
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index af533b3874cf5d..160a60fb63e3d9 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2484,8 +2484,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 /// Update BufferPtr to point to the next non-whitespace character and return.
 ///
 /// This method forms a token and returns true if KeepWhitespaceMode is enabled.
-bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
-                           bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   // Whitespace - Skip it, then return the token after the whitespace.
   bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
 
@@ -2541,7 +2540,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
   Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
   if (SawNewline) {
     Result.setFlag(Token::StartOfLine);
-    TokAtPhysicalStartOfLine = true;
+    IsCurrentLexingTokAtPhysicalStartOfLine = true;
 
     if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP) {
       if (auto *Handler = PP->getEmptylineHandler())
@@ -2560,8 +2559,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
-                            bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
   // If Line comments aren't explicitly enabled for this language, emit an
   // extension warning.
   if (!LineComment) {
@@ -2717,7 +2715,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
 
   // The next returned token is at the start of the line.
   Result.setFlag(Token::StartOfLine);
-  TokAtPhysicalStartOfLine = true;
+  IsCurrentLexingTokAtPhysicalStartOfLine = true;
   // No leading whitespace seen so far.
   Result.clearFlag(Token::LeadingSpace);
   BufferPtr = CurPtr;
@@ -2842,8 +2840,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L,
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
-                             bool &TokAtPhysicalStartOfLine) {
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
   // Scan one character past where we should, looking for a '/' character.  Once
   // we find it, check to see if it was preceded by a *.  This common
   // optimization helps people who like to put a lot of * characters in their
@@ -3046,7 +3043,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
+    SkipWhitespace(Result, CurPtr + 1);
     return false;
   }
 
@@ -3698,11 +3695,11 @@ bool Lexer::Lex(Token &Result) {
     HasLeadingEmptyMacro = false;
   }
 
-  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+  IsCurrentLexingTokAtPhysicalStartOfLine = IsAtPhysicalStartOfLine;
   IsAtPhysicalStartOfLine = false;
   bool isRawLex = isLexingRawMode();
   (void) isRawLex;
-  bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
+  bool returnedToken = LexTokenInternal(Result);
   // (After the LexTokenInternal call, the lexer might be destroyed.)
   assert((returnedToken || !isRawLex) && "Raw lex must succeed");
   return returnedToken;
@@ -3713,7 +3710,7 @@ bool Lexer::Lex(Token &Result) {
 /// has a null character at the end of the file.  This returns a preprocessing
 /// token, not a normal token, as such, it is an internal interface.  It assumes
 /// that the Flags of result have been cleared before calling this.
-bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
+bool Lexer::LexTokenInternal(Token &Result) {
 LexStart:
   assert(!Result.needsCleaning() && "Result needs cleaning");
   assert(!Result.hasPtrData() && "Result has not been reset");
@@ -3766,7 +3763,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     if (!isLexingRawMode())
       Diag(CurPtr-1, diag::null_in_file);
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
     // We know the lexer hasn't changed, so just try again with this lexer.
@@ -3812,7 +3809,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     // No leading whitespace seen so far.
     Result.clearFlag(Token::LeadingSpace);
 
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
     // We only saw whitespace, so just try again with this lexer.
@@ -3824,7 +3821,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
   case '\v':
   SkipHorizontalWhitespace:
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+    if (SkipWhitespace(Result, CurPtr))
       return true; // KeepWhitespaceMode
 
   SkipIgnoredUnits:
@@ -3834,11 +3831,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     // too (without going through the big switch stmt).
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
         LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipLineComment(Result, CurPtr + 2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+      if (SkipBlockComment(Result, CurPtr + 2))
         return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (isHorizontalWhitespace(*CurPtr)) {
@@ -4150,8 +4147,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
           TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
 
       if (TreatAsComment) {
-        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
-                            TokAtPhysicalStartOfLine))
+        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
           return true; // There is a token to return.
 
         // It is common for the tokens immediately after a // comment to be
@@ -4162,8 +4158,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     }
 
     if (Char == '*') {  // /**/ comment.
-      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
-                           TokAtPhysicalStartOfLine))
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
         return true; // There is a token to return.
 
       // We only saw whitespace, so just try again with this lexer.
@@ -4203,7 +4198,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
         // TODO: -fpreprocessed mode??
-        if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
+        if (IsCurrentLexingTokAtPhysicalStartOfLine && !LexingRawMode &&
+            !Is_PragmaLexer)
           goto HandleDirective;
 
         Kind = tok::hash;
@@ -4392,7 +4388,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
       // it's actually the start of a preprocessing directive.  Callback to
       // the preprocessor to handle it.
       // TODO: -fpreprocessed mode??
-      if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
+      if (IsCurrentLexingTokAtPhysicalStartOfLine && !LexingRawMode &&
+          !Is_PragmaLexer)
         goto HandleDirective;
 
       Kind = tok::hash;
@@ -4412,7 +4409,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
     if (!LangOpts.AsmPreprocessor) {
       if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
         if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-          if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          if (SkipWhitespace(Result, CurPtr))
             return true; // KeepWhitespaceMode
 
           // We only saw whitespace, so just try again with this lexer.
@@ -4445,7 +4442,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
                                   llvm::strictConversion);
     if (Status == llvm::conversionOK) {
       if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
-        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+        if (SkipWhitespace(Result, CurPtr))
           return true; // KeepWhitespaceMode
 
         // We only saw whitespace, so just try again with this lexer.

>From 581160a121fca5796bff2392d9228207d37e6f16 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Tue, 6 Aug 2024 01:09:56 +0800
Subject: [PATCH 4/6] [Clang] Partially implement theP1857R3: Modules
 Dependency Discovery

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Basic/IdentifierTable.h |   3 +-
 clang/include/clang/Lex/Preprocessor.h      | 172 ++++++++++----------
 clang/include/clang/Lex/Token.h             |   4 +
 clang/lib/Lex/Lexer.cpp                     |  24 +++
 clang/lib/Lex/Preprocessor.cpp              | 124 ++++++++------
 clang/lib/Parse/ParseDecl.cpp               |   7 -
 clang/lib/Parse/Parser.cpp                  |  34 ++--
 clang/test/CXX/basic/basic.link/p3.cpp      |  13 +-
 clang/test/CXX/lex/lex.pptoken/p3-2a.cpp    |  11 +-
 9 files changed, 219 insertions(+), 173 deletions(-)

diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index f40f74d0355ade..e8b9e381a5b4c3 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -585,7 +585,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   void RecomputeNeedsHandleIdentifier() {
     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
                             isExtensionToken() || isFutureCompatKeyword() ||
-                            isOutOfDate() || isModulesImport();
+                            isOutOfDate() || isModulesImport() ||
+                            isModulesDeclaration();
   }
 };
 
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 85712c1248fe12..f4c28b7cf6a54e 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -109,6 +109,77 @@ class TokenValue {
   }
 };
 
+/// Represents module or partition name token sequance.
+///
+///     module-name:
+///           module-name-qualifier[opt] identifier
+///
+///     partition-name: [C++20]
+///           : module-name-qualifier[opt] identifier
+///
+///     module-name-qualifier
+///           module-name-qualifier[opt] identifier .
+///
+/// This class can only be created by the preprocessor and guarantees that the
+/// two source array being contiguous in memory and only contains 3 kind of
+/// tokens (identifier, '.' and ':'). And only available when the preprocessor
+/// returns annot_module_name token.
+///
+/// For exmaple:
+///
+/// export module m.n:c.d
+///
+/// The module name array has 3 tokens ['m', '.', 'n'].
+/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
+///
+/// When import a partition in a named module fragment (Eg. import :part1;),
+/// the module name array will be empty, and the partition name array has 2
+/// tokens.
+///
+/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
+/// will not return a annot_module_name token, but will return 2 separate tokens
+/// [':', 'kw_private'].
+
+class ModuleNameInfo {
+  friend class Preprocessor;
+  ArrayRef<Token> ModuleName;
+  ArrayRef<Token> PartitionName;
+
+  ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
+
+public:
+  /// Return the contiguous token array.
+  ArrayRef<Token> getTokens() const {
+    if (ModuleName.empty())
+      return PartitionName;
+    if (PartitionName.empty())
+      return ModuleName;
+    return ArrayRef(ModuleName.begin(), PartitionName.end());
+  }
+  bool hasModuleName() const { return !ModuleName.empty(); }
+  bool hasPartitionName() const { return !PartitionName.empty(); }
+  ArrayRef<Token> getModuleName() const { return ModuleName; }
+  ArrayRef<Token> getPartitionName() const { return PartitionName; }
+  Token getColonToken() const {
+    assert(hasPartitionName() && "Do not have a partition name");
+    return getPartitionName().front();
+  }
+
+  /// Under the standard C++ Modules, the dot is just part of the module name,
+  /// and not a real hierarchy separator. Flatten such module names now.
+  std::string getFlatName() const;
+
+  /// Build a module id path from the contiguous token array, both include
+  /// module name and partition name.
+  void getModuleIdPath(
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
+
+  /// Build a module id path from \param ModuleName.
+  static void getModuleIdPath(
+      ArrayRef<Token> ModuleName,
+      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
+};
+
 /// Context in which macro name is used.
 enum MacroUse {
   // other than #define or #undef
@@ -337,6 +408,9 @@ class Preprocessor {
   /// Whether the last token we lexed was an '@'.
   bool LastTokenWasAt = false;
 
+  /// Whether the last token we lexed was an 'export' keyword.
+  std::optional<Token> LastTokenWasExportKeyword = std::nullopt;
+
   /// A position within a C++20 import-seq.
   class StdCXXImportSeq {
   public:
@@ -540,24 +614,12 @@ class Preprocessor {
         reset();
     }
 
-    void handleIdentifier(IdentifierInfo *Identifier) {
-      if (isModuleCandidate() && Identifier)
-        Name += Identifier->getName().str();
-      else if (!isNamedModule())
-        reset();
-    }
-
-    void handleColon() {
-      if (isModuleCandidate())
-        Name += ":";
-      else if (!isNamedModule())
-        reset();
-    }
-
-    void handlePeriod() {
-      if (isModuleCandidate())
-        Name += ".";
-      else if (!isNamedModule())
+    void handleModuleName(Token ModuleName) {
+      assert(ModuleName.is(tok::annot_module_name) && "Expect a module name");
+      if (isModuleCandidate()) {
+        Name = ModuleName.getAnnotationValueAs<ModuleNameInfo *>()
+                   ->getFlatName();
+      } else if (!isNamedModule())
         reset();
     }
 
@@ -2328,6 +2390,8 @@ class Preprocessor {
   /// token stream.
   bool HandleEndOfTokenLexer(Token &Result);
 
+  bool HandleModuleContextualKeyword(Token &Result);
+
   /// Callback invoked when the lexer sees a # token at the start of a
   /// line.
   ///
@@ -3098,78 +3162,6 @@ struct EmbedAnnotationData {
 
 /// Registry of pragma handlers added by plugins
 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
-
-/// Represents module or partition name token sequance.
-///
-///     module-name:
-///           module-name-qualifier[opt] identifier
-///
-///     partition-name: [C++20]
-///           : module-name-qualifier[opt] identifier
-///
-///     module-name-qualifier
-///           module-name-qualifier[opt] identifier .
-///
-/// This class can only be created by the preprocessor and guarantees that the
-/// two source array being contiguous in memory and only contains 3 kind of
-/// tokens (identifier, '.' and ':'). And only available when the preprocessor
-/// returns annot_module_name token.
-///
-/// For exmaple:
-///
-/// export module m.n:c.d
-///
-/// The module name array has 3 tokens ['m', '.', 'n'].
-/// The partition name array has 4 tokens [':', 'c', '.', 'd'].
-///
-/// When import a partition in a named module fragment (Eg. import :part1;),
-/// the module name array will be empty, and the partition name array has 2
-/// tokens.
-///
-/// When we meet a private-module-fragment (Eg. module :private;), preprocessor
-/// will not return a annot_module_name token, but will return 2 separate tokens
-/// [':', 'kw_private'].
-
-class ModuleNameInfo {
-  friend class Preprocessor;
-  ArrayRef<Token> ModuleName;
-  ArrayRef<Token> PartitionName;
-
-  ModuleNameInfo(ArrayRef<Token> AnnotToks, std::optional<unsigned> ColonIndex);
-
-public:
-  /// Return the contiguous token array.
-  ArrayRef<Token> getTokens() const {
-    if (ModuleName.empty())
-      return PartitionName;
-    if (PartitionName.empty())
-      return ModuleName;
-    return ArrayRef(ModuleName.begin(), PartitionName.end());
-  }
-  bool hasModuleName() const { return !ModuleName.empty(); }
-  bool hasPartitionName() const { return !PartitionName.empty(); }
-  ArrayRef<Token> getModuleName() const { return ModuleName; }
-  ArrayRef<Token> getPartitionName() const { return PartitionName; }
-  Token getColonToken() const {
-    assert(hasPartitionName() && "Do not have a partition name");
-    return getPartitionName().front();
-  }
-
-  /// Under the standard C++ Modules, the dot is just part of the module name,
-  /// and not a real hierarchy separator. Flatten such module names now.
-  std::string getFlatName() const;
-
-  /// Build a module id path from the contiguous token array, both include
-  /// module name and partition name.
-  void getModuleIdPath(
-      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path) const;
-
-  /// Build a module id path from \param ModuleName.
-  static void getModuleIdPath(
-      ArrayRef<Token> ModuleName,
-      SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path);
-};
-
 } // namespace clang
 
 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
index 2be3ad39529f05..8400ab7ed07e2a 100644
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -292,6 +292,10 @@ class Token {
   /// Return the ObjC keyword kind.
   tok::ObjCKeywordKind getObjCKeywordID() const;
 
+  /// Return true if we have an C++20 Modules contextual keyword(export, import
+  /// or module).
+  bool isModuleContextualKeyword() const;
+
   bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
 
   /// Return true if this token has trigraphs or escaped newlines in it.
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 160a60fb63e3d9..ffb82fa46984a6 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -74,6 +74,17 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
   return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
 }
 
+/// Return true if we have an C++20 Modules contextual keyword(export, import
+/// or module).
+bool Token::isModuleContextualKeyword() const {
+  if (is(tok::kw_export))
+    return true;
+  if (isNot(tok::identifier))
+    return false;
+  const auto *II = getIdentifierInfo();
+  return II->isModulesImport() || II->isModulesDeclaration();
+}
+
 /// Determine whether the token kind starts a simple-type-specifier.
 bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
   switch (getKind()) {
@@ -2003,6 +2014,13 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
     return true;
   }
 
+  if (Result.isModuleContextualKeyword()) {
+    Result.setFlagValue(Token::StartOfLine,
+                        IsCurrentLexingTokAtPhysicalStartOfLine);
+    if (PP->HandleModuleContextualKeyword(Result))
+      return true;
+  }
+
   // Finally, now that we know we have an identifier, pass this off to the
   // preprocessor, which may macro expand it or something.
   if (II->isHandleIdentifierCase())
@@ -4556,6 +4574,12 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
     Result.setRawIdentifierData(TokPtr);
     if (!isLexingRawMode()) {
       const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+      if (Result.isModuleContextualKeyword()) {
+        // Result.setFlagValue(Token::StartOfLine,
+        //                     IsCurrentLexingTokAtPhysicalStartOfLine);
+        if (PP->HandleModuleContextualKeyword(Result))
+          return true;
+      }
       if (II->isHandleIdentifierCase())
         return PP->HandleIdentifier(Result);
     }
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index d0c4ab8fd5b669..7f7e6d2f2fe535 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -863,12 +863,6 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
     CurLexerCallback = CLK_LexAfterModuleImport;
   }
 
-  if ((II.isModulesDeclaration() || Identifier.is(tok::kw_module)) &&
-      !InMacroArgs && !DisableMacroExpansion &&
-      (getLangOpts().CPlusPlusModules || getLangOpts().DebuggerSupport) &&
-      CurLexerCallback != CLK_CachingLexer) {
-    CurLexerCallback = CLK_LexAfterModuleDecl;
-  }
   return true;
 }
 
@@ -926,55 +920,22 @@ void Preprocessor::Lex(Token &Result) {
       StdCXXImportSeqState.handleExport();
       ModuleDeclState.handleExport();
       break;
-    case tok::annot_module_name: {
-      auto *Info = static_cast<ModuleNameInfo *>(Result.getAnnotationValue());
-      for (const auto &Tok : Info->getTokens()) {
-        switch (Tok.getKind()) {
-        case tok::identifier:
-          ModuleDeclState.handleIdentifier(Tok.getIdentifierInfo());
-          break;
-        case tok::period:
-          ModuleDeclState.handlePeriod();
-          break;
-        case tok::colon:
-          ModuleDeclState.handleColon();
-          break;
-        default:
-          llvm_unreachable("Unexpected token in module name");
-        }
-      }
-      if (ModuleDeclState.isModuleCandidate())
-        break;
-      TrackGMFState.handleMisc();
-      StdCXXImportSeqState.handleMisc();
-      ModuleDeclState.handleMisc();
+    case tok::kw_module:
+      TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
+      ModuleDeclState.handleModule();
       break;
-    }
-    case tok::identifier:
-      // Check "import" and "module" when there is no open bracket. The two
-      // identifiers are not meaningful with open brackets.
+    case tok::kw_import:
       if (StdCXXImportSeqState.atTopLevel()) {
-        if (Result.getIdentifierInfo()->isModulesImport()) {
-          TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
-          StdCXXImportSeqState.handleImport();
-          if (StdCXXImportSeqState.afterImportSeq()) {
-            ModuleImportLoc = Result.getLocation();
-            NamedModuleImportPath.clear();
-            IsAtImport = false;
-            CurLexerCallback = CLK_LexAfterModuleImport;
-          }
-          break;
-        }
-        if (Result.getIdentifierInfo()->isModulesDeclaration()) {
-          TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq());
-          ModuleDeclState.handleModule();
-          CurLexerCallback = CLK_LexAfterModuleDecl;
-          break;
-        }
+        TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq());
+        StdCXXImportSeqState.handleImport();
       }
-      if (ModuleDeclState.isModuleCandidate())
-        break;
-      [[fallthrough]];
+      ModuleImportLoc = Result.getLocation();
+      NamedModuleImportPath.clear();
+      IsAtImport = false;
+      break;
+    case tok::annot_module_name:
+      ModuleDeclState.handleModuleName(Result);
+      break;
     default:
       TrackGMFState.handleMisc();
       StdCXXImportSeqState.handleMisc();
@@ -989,6 +950,8 @@ void Preprocessor::Lex(Token &Result) {
   }
 
   LastTokenWasAt = Result.is(tok::at);
+  if (Result.isNot(tok::kw_export))
+    LastTokenWasExportKeyword.reset();
   --LexLevel;
 
   if ((LexLevel == 0 || PreprocessToken) &&
@@ -1012,6 +975,63 @@ void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
   }
 }
 
+/// P1857R3: Modules Dependency Discovery
+///
+/// At the start of phase 4 an import or module token is treated as starting a
+/// directive and are converted to their respective keywords iff:
+///   • After skipping horizontal whitespace are
+///     • at the start of a logical line, or
+///     • preceded by an 'export' at the start of the logical line.
+///   • Are followed by an identifier pp token (before macro expansion), or
+///     • <, ", or : (but not ::) pp tokens for 'import', or
+///     • ; for 'module'
+/// Otherwise the token is treated as an identifier.
+bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
+  if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword() ||
+      InMacroArgs || DisableMacroExpansion ||
+      CurLexerCallback == CLK_CachingLexer)
+    return false;
+
+  if (Result.is(tok::kw_export)) {
+    LastTokenWasExportKeyword = Result;
+    return false;
+  }
+
+  if (LastTokenWasExportKeyword) {
+    if (!LastTokenWasExportKeyword->isAtStartOfLine())
+      return false;
+    // [cpp.pre]/1.4
+    // export                  // not a preprocessing directive
+    // import foo;             // preprocessing directive (ill-formed at phase
+    // 7)
+    if (Result.isAtStartOfLine())
+      return false;
+  } else if (!Result.isAtStartOfLine())
+    return false;
+
+  // Peek next token.
+  auto NextTok = peekNextPPToken().value_or(Token{});
+  if (Result.getIdentifierInfo()->isModulesImport() &&
+      NextTok.isOneOf(tok::raw_identifier, tok::less, tok::string_literal,
+                      tok::colon)) {
+    Result.setKind(tok::kw_import);
+    ModuleImportLoc = Result.getLocation();
+    IsAtImport = false;
+    CurLexerCallback = CLK_LexAfterModuleImport;
+    return true;
+  }
+  if (Result.getIdentifierInfo()->isModulesDeclaration() &&
+      NextTok.isOneOf(tok::raw_identifier, tok::colon, tok::semi)) {
+    Result.setKind(tok::kw_module);
+    NamedModuleImportPath.clear();
+    CurLexerCallback = CLK_LexAfterModuleDecl;
+    return true;
+  }
+
+  // Ok, it's an identifier.
+  return false;
+}
+
 /// Lex a header-name token (including one formed from header-name-tokens if
 /// \p AllowMacroExpansion is \c true).
 ///
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 713e32b1a313f3..8893c89b3d5b93 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4002,13 +4002,6 @@ void Parser::ParseDeclarationSpecifiers(
 
       // We're done with the declaration-specifiers.
       goto DoneWithDeclSpec;
-    case tok::annot_module_name: {
-      PP.EnterTokenStream(
-          Tok.getAnnotationValueAs<ModuleNameInfo *>()->getTokens(),
-          /*DisableMacroExpansion=*/true, /*IsReinject=*/false);
-      ConsumeAnyToken();
-      [[fallthrough]];
-    }
       // typedef-name
     case tok::kw___super:
     case tok::kw_decltype:
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index afb2e1e4161682..443f16b5c0da93 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -642,6 +642,8 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
 
   case tok::kw_export:
     switch (NextToken().getKind()) {
+    case tok::kw_import:
+      goto import_decl;
     case tok::kw_module:
       goto module_decl;
 
@@ -652,14 +654,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     // Recognize context-sensitive C++20 'export module' and 'export import'
     // declarations.
     case tok::identifier: {
-      IdentifierInfo *II = NextToken().getIdentifierInfo();
-      if ((II == Ident_module || II == Ident_import) &&
-          GetLookAheadToken(2).isNot(tok::coloncolon)) {
-        if (II == Ident_module)
-          goto module_decl;
-        else
-          goto import_decl;
-      }
+      // IdentifierInfo *II = NextToken().getIdentifierInfo();
+      // if ((II == Ident_module || II == Ident_import) &&
+      //     GetLookAheadToken(2).isNot(tok::coloncolon)) {
+      //   if (II == Ident_module)
+      //     goto module_decl;
+      //   else
+      //     goto import_decl;
+      // }
       break;
     }
 
@@ -736,14 +738,14 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     //   A token sequence beginning with 'export[opt] module' or
     //   'export[opt] import' and not immediately followed by '::'
     //   is never interpreted as the declaration of a top-level-declaration.
-    if ((Tok.getIdentifierInfo() == Ident_module ||
-         Tok.getIdentifierInfo() == Ident_import) &&
-        NextToken().isNot(tok::coloncolon)) {
-      if (Tok.getIdentifierInfo() == Ident_module)
-        goto module_decl;
-      else
-        goto import_decl;
-    }
+    // if ((Tok.getIdentifierInfo() == Ident_module ||
+    //      Tok.getIdentifierInfo() == Ident_import) &&
+    //     NextToken().isNot(tok::coloncolon)) {
+    //   if (Tok.getIdentifierInfo() == Ident_module)
+    //     goto module_decl;
+    //   else
+    //     goto import_decl;
+    // }
     break;
 
   default:
diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp
index 23f39d11b655a6..8793162d8957e3 100644
--- a/clang/test/CXX/basic/basic.link/p3.cpp
+++ b/clang/test/CXX/basic/basic.link/p3.cpp
@@ -6,6 +6,8 @@ module;
 
 #if IMPORT_ERROR != 2
 struct import { struct inner {}; };
+#else
+// expected-no-diagnostics
 #endif
 struct module { struct inner {}; };
 
@@ -24,9 +26,9 @@ template<> struct import<n> {
   static X y;
 };
 
-// This is not valid because the 'import <n>' is a pp-import, even though it
-// grammatically can't possibly be an import declaration.
-struct X {} import<n>::y; // expected-error {{'n' file not found}}
+// Well-formed since P1857R3: Modules Dependency Discovery (https://wg21.link/p1857r3),
+// it grammatically can't possibly be an import declaration.
+struct X {} import<n>::y;
 
 #else
 module y = {}; // expected-error {{multiple module declarations}} expected-error 2{{}}
@@ -38,9 +40,10 @@ ::module y = {};
 import::inner xi = {};
 module::inner yi = {};
 
+// Ill-formed since P1857R3: Modules Dependency Discovery (https://wg21.link/p1857r3).
 namespace N {
-  module a;
-  import b;
+  module a; // expected-error {{module declaration can only appear at the top level}}
+  import b; // expected-error {{module 'b' not found}}
 }
 
 extern "C++" module cxxm;
diff --git a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
index 0e0e5fec6e9d8b..dcde61aa4620c0 100644
--- a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
+++ b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp
@@ -15,7 +15,11 @@ import <foo  bar>;
 // CHECK: import <foo  bar>;
 import <foo  bar>;
 
-// CHECK: foo; import <foo  bar>;
+// Since P1857R3, this is a invalid import directive, import will be treated as
+// an identifier. Also <foo  bar> will not be a tok::header_name, but will be 4
+// separate tokens.
+//
+// CHECK: foo; import <foo bar>;
 foo; import <foo  bar>;
 
 // CHECK: foo import <foo bar>;
@@ -45,7 +49,10 @@ export export import <foo  bar>;
 import <foo  bar>;
 
 UNBALANCED_PAREN
-// CHECK: import <foo bar>;
+// Since P1857R3, this is a invalid import directive. '<foo  bar>' will be treated as
+// a tok::header_name, but not 4 separate tokens.
+
+// CHECK: import <foo  bar>;
 import <foo  bar>;
 )
 

>From fd71e8ca298d808e6a2a76db77562daa3a09ce8f Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 15 Aug 2024 23:01:24 +0800
Subject: [PATCH 5/6] [Clang] Fix test and remove dead code

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Lex/Preprocessor.h        |  4 +-
 clang/include/clang/Parse/Parser.h            |  4 --
 clang/lib/Parse/Parser.cpp                    | 23 ---------
 .../dcl.module/dcl.module.import/p1.cppm      |  6 ++-
 .../test/CXX/module/dcl.dcl/dcl.module/p1.cpp | 51 ++++++++++++++-----
 5 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index f4c28b7cf6a54e..c6f1709aa874eb 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -617,8 +617,8 @@ class Preprocessor {
     void handleModuleName(Token ModuleName) {
       assert(ModuleName.is(tok::annot_module_name) && "Expect a module name");
       if (isModuleCandidate()) {
-        Name = ModuleName.getAnnotationValueAs<ModuleNameInfo *>()
-                   ->getFlatName();
+        Name =
+            ModuleName.getAnnotationValueAs<ModuleNameInfo *>()->getFlatName();
       } else if (!isNamedModule())
         reset();
     }
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 31c48f8805f4dc..7c9c569b9d2e46 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -165,10 +165,6 @@ class Parser : public CodeCompletionHandler {
   mutable IdentifierInfo *Ident_GNU_final;
   mutable IdentifierInfo *Ident_override;
 
-  // C++2a contextual keywords.
-  mutable IdentifierInfo *Ident_import;
-  mutable IdentifierInfo *Ident_module;
-
   // C++ type trait keywords that can be reverted to identifiers and still be
   // used as type traits.
   llvm::SmallDenseMap<IdentifierInfo *, tok::TokenKind> RevertibleTypeTraits;
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 443f16b5c0da93..a0cf79417ea0a0 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -514,8 +514,6 @@ void Parser::Initialize() {
   Ident_abstract = nullptr;
   Ident_override = nullptr;
   Ident_GNU_final = nullptr;
-  Ident_import = nullptr;
-  Ident_module = nullptr;
 
   Ident_super = &PP.getIdentifierTable().get("super");
 
@@ -571,11 +569,6 @@ void Parser::Initialize() {
     PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block);
   }
 
-  if (getLangOpts().CPlusPlusModules) {
-    Ident_import = PP.getIdentifierInfo("import");
-    Ident_module = PP.getIdentifierInfo("module");
-  }
-
   Actions.Initialize();
 
   // Prime the lexer look-ahead.
@@ -732,22 +725,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     Actions.ActOnEndOfTranslationUnit();
     //else don't tell Sema that we ended parsing: more input might come.
     return true;
-
-  case tok::identifier:
-    // C++2a [basic.link]p3:
-    //   A token sequence beginning with 'export[opt] module' or
-    //   'export[opt] import' and not immediately followed by '::'
-    //   is never interpreted as the declaration of a top-level-declaration.
-    // if ((Tok.getIdentifierInfo() == Ident_module ||
-    //      Tok.getIdentifierInfo() == Ident_import) &&
-    //     NextToken().isNot(tok::coloncolon)) {
-    //   if (Tok.getIdentifierInfo() == Ident_module)
-    //     goto module_decl;
-    //   else
-    //     goto import_decl;
-    // }
-    break;
-
   default:
     break;
   }
diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
index ecad4db32a7e94..1c67be2ff774f3 100644
--- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
+++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm
@@ -48,7 +48,8 @@ import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn'
 
 import x.y;
 import x.; // expected-error {{expected a module name after 'import'}}
-import .x; // expected-error {{expected a module name after 'import'}}
+import .x; // expected-error {{unknown type name 'import'}} \
+           // expected-error {{cannot use dot operator on a type}}
 
 int use_4 = c; // ok
 
@@ -66,7 +67,8 @@ import x [[blarg::noreturn]]; // expected-warning {{unknown attribute 'noreturn'
 
 import x.y;
 import x.; // expected-error {{expected a module name after 'import'}}
-import .x; // expected-error {{expected a module name after 'import'}}
+import .x; // expected-error {{unknown type name 'import'}} \
+           // expected-error {{cannot use dot operator on a type}}
 
 int use_4 = c; // ok
 
diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp b/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp
index db86b5dd34c380..e12a0106cb2f4b 100644
--- a/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp
+++ b/clang/test/CXX/module/dcl.dcl/dcl.module/p1.cpp
@@ -1,14 +1,37 @@
-// RUN: %clang_cc1 -std=c++20 -verify %s -DFOO=export -DBAR=export
-// RUN: %clang_cc1 -std=c++20 -verify %s -DFOO=export -DBAR=
-// RUN: %clang_cc1 -std=c++20 %s -DFOO=export -emit-module-interface -o %t
-// RUN: %clang_cc1 -std=c++20 %s -fmodule-file=foo=%t -DFOO=
-// RUN: %clang_cc1 -std=c++20 %s -fmodule-file=foo=%t -DBAR=export
-// RUN: %clang_cc1 -std=c++20 -verify %s -fmodule-file=foo=%t -DFOO= -DBAR=export
-
-#ifdef FOO
-FOO module foo; // expected-note {{previous module declaration is here}}
-#endif
-
-#ifdef BAR
-BAR module bar; // expected-error {{translation unit contains multiple module declarations}}
-#endif
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -std=c++20 -verify %t/A.cpp
+// RUN: %clang_cc1 -std=c++20 -verify %t/B.cpp
+// RUN: %clang_cc1 -std=c++20 %t/C.cpp -emit-module-interface -o %t/C.pcm
+// RUN: %clang_cc1 -std=c++20 %t/D.cpp -fmodule-file=foo=%t/C.pcm
+// RUN: %clang_cc1 -std=c++20 %t/E.cpp -fmodule-file=foo=%t/C.pcm
+// RUN: %clang_cc1 -std=c++20 -verify %t/F.cpp -fmodule-file=foo=%t/C.pcm
+
+//--- A.cpp
+module;
+export module foo; // expected-note {{previous module declaration is here}}
+export module bar; // expected-error {{translation unit contains multiple module declarations}}
+
+//--- B.cpp
+module;
+export module foo;  // expected-note {{previous module declaration is here}}
+module bar;         // expected-error {{translation unit contains multiple module declarations}}
+
+//--- C.cpp
+module;
+export module foo;
+
+//--- D.cpp
+module;
+module foo;
+
+//--- E.cpp
+module;
+export module bar;
+
+//--- F.cpp
+module;
+module foo; // expected-note {{previous module declaration is here}}
+export module bar; // expected-error {{translation unit contains multiple module declarations}}

>From f0eb14527a75fa321d682358f61a4b5648880018 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 15 Aug 2024 23:19:33 +0800
Subject: [PATCH 6/6] [Clang] format

Signed-off-by: yronglin <yronglin777 at gmail.com>
---
 clang/include/clang/Lex/Lexer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 142f3f05381298..4777609f17a351 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -751,9 +751,9 @@ class Lexer : public PreprocessorLexer {
   bool LexCharConstant       (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
   bool LexEndOfFile          (Token &Result, const char *CurPtr);
-  bool SkipWhitespace        (Token &Result, const char *CurPtr);
-  bool SkipLineComment       (Token &Result, const char *CurPtr);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr);
+  bool SkipWhitespace(Token &Result, const char *CurPtr);
+  bool SkipLineComment(Token &Result, const char *CurPtr);
+  bool SkipBlockComment(Token &Result, const char *CurPtr);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
 
   bool IsStartOfConflictMarker(const char *CurPtr);