[clang] [C++20][Modules] Implement P1857R3 Modules Dependency Discovery (PR #107168)

via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 1 10:21:37 PDT 2025


================
@@ -1119,43 +1115,158 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
   return false;
 }
 
+// We represent the primary and partition names as 'Paths' which are sections
+// of the hierarchical access path for a clang module.  However for C++20
+// the periods in a name are just another character, and we will need to
+// flatten them into a string.
+std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
+  std::string Name;
+  if (Path.empty())
+    return Name;
+
+  for (auto &Piece : Path) {
+    assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
+    if (!Name.empty())
+      Name += ".";
+    Name += Piece.getIdentifierInfo()->getName();
+  }
+  return Name;
+}
+
+bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
+                                         SmallVectorImpl<Token> &Suffix,
+                                         SmallVectorImpl<IdentifierLoc> &Path,
+                                         bool AllowMacroExpansion) {
+  auto ConsumeToken = [&]() {
+    if (AllowMacroExpansion)
+      Lex(Tok);
+    else
+      LexUnexpandedToken(Tok);
+    Suffix.push_back(Tok);
+  };
+
+  Suffix.push_back(Tok);
+  while (true) {
+    if (Tok.isNot(tok::identifier))
+      return true;
+
+    // Record this part of the module path.
+    Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo());
+    ConsumeToken();
+
+    if (Tok.isNot(tok::period))
+      return false;
+
+    ConsumeToken();
+  }
+}
+
+/// P1857R3: Modules Dependency Discovery
+///
+/// At the start of phase 4 an import or module token is treated as starting a
+/// directive and are converted to their respective keywords iff:
+///   - After skipping horizontal whitespace are
+///     - at the start of a logical line, or
+///     - preceded by an 'export' at the start of the logical line.
+///   - Are followed by an identifier pp token (before macro expansion), or
+///     - <, ", or : (but not ::) pp tokens for 'import', or
+///     - ; for 'module'
+/// Otherwise the token is treated as an identifier.
+bool Preprocessor::HandleModuleContextualKeyword(
+    Token &Result, bool TokAtPhysicalStartOfLine) {
+  if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
+    return false;
+
+  if (Result.is(tok::kw_export)) {
+    LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
+    return false;
+  }
+
+  if (LastTokenWasExportKeyword.isValid()) {
+    // The export keyword was not at the start of line, it's not a
+    // directive-introducing token.
+    if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine())
+      return false;
+    // [cpp.pre]/1.4
+    // export                  // not a preprocessing directive
+    // import foo;             // preprocessing directive (ill-formed at phase
+    // 7)
+    if (TokAtPhysicalStartOfLine)
+      return false;
+  } else if (!TokAtPhysicalStartOfLine)
+    return false;
+
+  bool SavedParsingPreprocessorDirective =
+      CurPPLexer->ParsingPreprocessorDirective;
+  CurPPLexer->ParsingPreprocessorDirective = true;
+  auto _ = llvm::make_scope_exit([&]() {
+    CurPPLexer->ParsingPreprocessorDirective =
+        SavedParsingPreprocessorDirective;
+  });
+
+  if (Result.getIdentifierInfo()->isModulesImport() &&
+      isNextPPTokenOneOf(tok::raw_identifier, tok::less, tok::string_literal,
+                         tok::colon)) {
+    Result.setKind(tok::kw_import);
+    ModuleImportLoc = Result.getLocation();
+    IsAtImport = false;
+    return true;
+  }
+
+  if (Result.getIdentifierInfo()->isModulesDeclaration() &&
+      isNextPPTokenOneOf(tok::raw_identifier, tok::colon, tok::semi)) {
+    Result.setKind(tok::kw_module);
+    ModuleDeclLoc = Result.getLocation();
+    return true;
+  }
+
+  // Ok, it's an identifier.
+  return false;
+}
+
+bool Preprocessor::CollectPPImportSuffixAndEnterStream(
+    SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
+  CollectPPImportSuffix(Toks);
+  EnterModuleSuffixTokenStream(Toks);
+  return false;
+}
+
 /// Collect the tokens of a C++20 pp-import-suffix.
-void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
+void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
+                                         bool StopUntilEOD) {
   // FIXME: For error recovery, consider recognizing attribute syntax here
   // and terminating / diagnosing a missing semicolon if we find anything
   // else? (Can we leave that to the parser?)
-  unsigned BracketDepth = 0;
   while (true) {
     Toks.emplace_back();
     Lex(Toks.back());
 
     switch (Toks.back().getKind()) {
-    case tok::l_paren: case tok::l_square: case tok::l_brace:
-      ++BracketDepth;
-      break;
-
-    case tok::r_paren: case tok::r_square: case tok::r_brace:
-      if (BracketDepth == 0)
-        return;
-      --BracketDepth;
-      break;
-
     case tok::semi:
-      if (BracketDepth == 0)
+      if (!StopUntilEOD)
         return;
-    break;
-
+      [[fallthrough]];
+    case tok::eod:
     case tok::eof:
       return;
-
     default:
       break;
     }
   }
 }
 
+// Allocate a holding buffer for a sequence of tokens and introduce it into
+// the token stream.
+void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
+  if (Toks.empty())
+    return;
+  auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+  std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+  EnterTokenStream(std::move(ToksCopy), Toks.size(),
+                   /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+}
 
-/// Lex a token following the 'import' contextual keyword.
+// Lex a token following the 'import' contextual keyword.
----------------
yronglin wrote:

Removed this change.

https://github.com/llvm/llvm-project/pull/107168


More information about the cfe-commits mailing list