[clang] [C++20][Modules] Implement P1857R3 Modules Dependency Discovery (PR #107168)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 1 10:21:37 PDT 2025
================
@@ -1119,43 +1115,158 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
return false;
}
+// We represent the primary and partition names as 'Paths' which are sections
+// of the hierarchical access path for a clang module. However for C++20
+// the periods in a name are just another character, and we will need to
+// flatten them into a string.
+std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) {
+ std::string Name;
+ if (Path.empty())
+ return Name;
+
+ for (auto &Piece : Path) {
+ assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid());
+ if (!Name.empty())
+ Name += ".";
+ Name += Piece.getIdentifierInfo()->getName();
+ }
+ return Name;
+}
+
+bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc,
+ SmallVectorImpl<Token> &Suffix,
+ SmallVectorImpl<IdentifierLoc> &Path,
+ bool AllowMacroExpansion) {
+ auto ConsumeToken = [&]() {
+ if (AllowMacroExpansion)
+ Lex(Tok);
+ else
+ LexUnexpandedToken(Tok);
+ Suffix.push_back(Tok);
+ };
+
+ Suffix.push_back(Tok);
+ while (true) {
+ if (Tok.isNot(tok::identifier))
+ return true;
+
+ // Record this part of the module path.
+ Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo());
+ ConsumeToken();
+
+ if (Tok.isNot(tok::period))
+ return false;
+
+ ConsumeToken();
+ }
+}
+
+/// P1857R3: Modules Dependency Discovery
+///
+/// At the start of phase 4 an import or module token is treated as starting a
+/// directive and are converted to their respective keywords iff:
+/// - After skipping horizontal whitespace are
+/// - at the start of a logical line, or
+/// - preceded by an 'export' at the start of the logical line.
+/// - Are followed by an identifier pp token (before macro expansion), or
+/// - <, ", or : (but not ::) pp tokens for 'import', or
+/// - ; for 'module'
+/// Otherwise the token is treated as an identifier.
+bool Preprocessor::HandleModuleContextualKeyword(
+ Token &Result, bool TokAtPhysicalStartOfLine) {
+ if (!getLangOpts().CPlusPlusModules || !Result.isModuleContextualKeyword())
+ return false;
+
+ if (Result.is(tok::kw_export)) {
+ LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine};
+ return false;
+ }
+
+ if (LastTokenWasExportKeyword.isValid()) {
+ // The export keyword was not at the start of line, it's not a
+ // directive-introducing token.
+ if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine())
+ return false;
+ // [cpp.pre]/1.4
+ // export // not a preprocessing directive
+ // import foo; // preprocessing directive (ill-formed at phase
+ // 7)
+ if (TokAtPhysicalStartOfLine)
+ return false;
+ } else if (!TokAtPhysicalStartOfLine)
+ return false;
+
+ bool SavedParsingPreprocessorDirective =
+ CurPPLexer->ParsingPreprocessorDirective;
+ CurPPLexer->ParsingPreprocessorDirective = true;
+ auto _ = llvm::make_scope_exit([&]() {
+ CurPPLexer->ParsingPreprocessorDirective =
+ SavedParsingPreprocessorDirective;
+ });
+
+ if (Result.getIdentifierInfo()->isModulesImport() &&
+ isNextPPTokenOneOf(tok::raw_identifier, tok::less, tok::string_literal,
+ tok::colon)) {
+ Result.setKind(tok::kw_import);
+ ModuleImportLoc = Result.getLocation();
+ IsAtImport = false;
+ return true;
+ }
+
+ if (Result.getIdentifierInfo()->isModulesDeclaration() &&
+ isNextPPTokenOneOf(tok::raw_identifier, tok::colon, tok::semi)) {
+ Result.setKind(tok::kw_module);
+ ModuleDeclLoc = Result.getLocation();
+ return true;
+ }
+
+ // Ok, it's an identifier.
+ return false;
+}
+
+bool Preprocessor::CollectPPImportSuffixAndEnterStream(
+ SmallVectorImpl<Token> &Toks, bool StopUntilEOD) {
+ CollectPPImportSuffix(Toks);
+ EnterModuleSuffixTokenStream(Toks);
+ return false;
+}
+
/// Collect the tokens of a C++20 pp-import-suffix.
-void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
+void Preprocessor::CollectPPImportSuffix(SmallVectorImpl<Token> &Toks,
+ bool StopUntilEOD) {
// FIXME: For error recovery, consider recognizing attribute syntax here
// and terminating / diagnosing a missing semicolon if we find anything
// else? (Can we leave that to the parser?)
- unsigned BracketDepth = 0;
while (true) {
Toks.emplace_back();
Lex(Toks.back());
switch (Toks.back().getKind()) {
- case tok::l_paren: case tok::l_square: case tok::l_brace:
- ++BracketDepth;
- break;
-
- case tok::r_paren: case tok::r_square: case tok::r_brace:
- if (BracketDepth == 0)
- return;
- --BracketDepth;
- break;
-
case tok::semi:
- if (BracketDepth == 0)
+ if (!StopUntilEOD)
return;
- break;
-
+ [[fallthrough]];
+ case tok::eod:
case tok::eof:
return;
-
default:
break;
}
}
}
+// Allocate a holding buffer for a sequence of tokens and introduce it into
+// the token stream.
+void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef<Token> Toks) {
+ if (Toks.empty())
+ return;
+ auto ToksCopy = std::make_unique<Token[]>(Toks.size());
+ std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
+ EnterTokenStream(std::move(ToksCopy), Toks.size(),
+ /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
+}
-/// Lex a token following the 'import' contextual keyword.
+// Lex a token following the 'import' contextual keyword.
----------------
yronglin wrote:
Removed this change.
https://github.com/llvm/llvm-project/pull/107168
More information about the cfe-commits
mailing list