[clang] af2d11b - [C++20][Modules] Implement include translation.

Iain Sandoe via cfe-commits cfe-commits at lists.llvm.org
Sun Jul 10 03:07:21 PDT 2022


Author: Iain Sandoe
Date: 2022-07-10T11:06:51+01:00
New Revision: af2d11b1d5c1508b506825df460656e0151cd3b0

URL: https://github.com/llvm/llvm-project/commit/af2d11b1d5c1508b506825df460656e0151cd3b0
DIFF: https://github.com/llvm/llvm-project/commit/af2d11b1d5c1508b506825df460656e0151cd3b0.diff

LOG: [C++20][Modules] Implement include translation.

This addresses [cpp.include]/7

(when encountering #include header-name)

If the header identified by the header-name denotes an importable header, it
is implementation-defined whether the #include preprocessing directive is
instead replaced by an import directive.

In this implementation, include translation is performed _only_ for headers
in the Global Module fragment, so:
```
module;
 #include "will-be-translated.h" // IFF the header unit is available.

export module M;
 #include "will-not-be-translated.h" // even if the header unit is available
```
The reasoning is that, in general, includes in the module purview would not
be validly translatable (they would have to immediately follow the module
decl and without any other intervening decls).  Otherwise that would violate
the rules on contiguous import directives.

This would be quite complex to track in the preprocessor, and for relatively
little gain (the user can 'import "will-not-be-translated.h";' instead.)

TODO: This is one area where it becomes increasingly difficult to disambiguate
clang modules in C++ from C++ standard modules.  That needs to be addressed in
both the driver and the FE.

Differential Revision: https://reviews.llvm.org/D128981

Added: 
    clang/test/Modules/cxx20-include-translation.cpp

Modified: 
    clang/include/clang/Lex/Preprocessor.h
    clang/lib/Lex/PPDirectives.cpp
    clang/lib/Lex/Preprocessor.cpp
    clang/lib/Parse/Parser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index d130aba3ee3a2..8fc24c7310358 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -385,6 +385,7 @@ class Preprocessor {
 
     bool atTopLevel() { return S <= 0; }
     bool afterImportSeq() { return S == AfterImportSeq; }
+    bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
 
   private:
     State S;
@@ -397,6 +398,67 @@ class Preprocessor {
   /// Our current position within a C++20 import-seq.
   ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
 
+  /// Track whether we are in a Global Module Fragment
+  class TrackGMF {
+  public:
+    enum GMFState : int {
+      GMFActive = 1,
+      MaybeGMF = 0,
+      BeforeGMFIntroducer = -1,
+      GMFAbsentOrEnded = -2,
+    };
+
+    TrackGMF(GMFState S) : S(S) {}
+
+    /// Saw a semicolon.
+    void handleSemi() {
+      // If it is immediately after the first instance of the module keyword,
+      // then that introduces the GMF.
+      if (S == MaybeGMF)
+        S = GMFActive;
+    }
+
+    /// Saw an 'export' identifier.
+    void handleExport() {
+      // The presence of an 'export' keyword always ends or excludes a GMF.
+      S = GMFAbsentOrEnded;
+    }
+
+    /// Saw an 'import' identifier.
+    void handleImport(bool AfterTopLevelTokenSeq) {
+      // If we see this before any 'module' kw, then we have no GMF.
+      if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
+        S = GMFAbsentOrEnded;
+    }
+
+    /// Saw a 'module' identifier.
+    void handleModule(bool AfterTopLevelTokenSeq) {
+      // This was the first module identifier and not preceded by any token
+      // that would exclude a GMF.  It could begin a GMF, but only if directly
+      // followed by a semicolon.
+      if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
+        S = MaybeGMF;
+      else
+        S = GMFAbsentOrEnded;
+    }
+
+    /// Saw any other token.
+    void handleMisc() {
+      // We saw something other than ; after the 'module' kw, so not a GMF.
+      if (S == MaybeGMF)
+        S = GMFAbsentOrEnded;
+    }
+
+    bool inGMF() { return S == GMFActive; }
+
+  private:
+    /// Track the transitions into and out of a Global Module Fragment,
+    /// if one is present.
+    GMFState S;
+  };
+
+  TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
+
   /// Whether the module import expects an identifier next. Otherwise,
   /// it expects a '.' or ';'.
   bool ModuleImportExpectsIdentifier = false;
@@ -2414,6 +2476,7 @@ class Preprocessor {
       None,
       ModuleBegin,
       ModuleImport,
+      HeaderUnitImport,
       SkippedModuleImport,
       Failure,
     } Kind;

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 4dcef01e3e4c4..352e1f2178193 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1983,6 +1983,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_begin, Action.ModuleForHeader);
     break;
+  case ImportAction::HeaderUnitImport:
+    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
+                         Action.ModuleForHeader);
+    break;
   case ImportAction::ModuleImport:
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_include, Action.ModuleForHeader);
@@ -2191,6 +2195,17 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // known to have no effect beyond its effect on module visibility -- that is,
   // if it's got an include guard that is already defined, set to Import if it
   // is a modular header we've already built and should import.
+
+  // For C++20 Modules
+  // [cpp.include]/7 If the header identified by the header-name denotes an
+  // importable header, it is implementation-defined whether the #include
+  // preprocessing directive is instead replaced by an import directive.
+  // For this implementation, the translation is permitted when we are parsing
+  // the Global Module Fragment, and not otherwise (the cases where it would be
+  // valid to replace an include with an import are highly constrained once in
+  // named module purview; this choice avoids considerable complexity in
+  // determining valid cases).
+
   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
 
   if (PPOpts->SingleFileParseMode)
@@ -2203,13 +2218,34 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
       alreadyIncluded(*File))
     Action = IncludeLimitReached;
 
+  bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&
+                               !isForModuleBuilding(SuggestedModule.getModule(),
+                                                    getLangOpts().CurrentModule,
+                                                    getLangOpts().ModuleName);
+
+  // FIXME: We do not have a good way to disambiguate C++ clang modules from
+  // C++ standard modules (other than use/non-use of Header Units).
+  Module *SM = SuggestedModule.getModule();
+  // Maybe a usable Header Unit
+  bool UsableHeaderUnit = false;
+  if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
+    if (TrackGMFState.inGMF() || IsImportDecl)
+      UsableHeaderUnit = true;
+    else if (!IsImportDecl) {
+      // This is a Header Unit that we do not include-translate
+      SuggestedModule = ModuleMap::KnownHeader();
+      SM = nullptr;
+    }
+  }
+  // Maybe a usable clang header module.
+  bool UsableHeaderModule =
+      (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
+      !SM->isHeaderUnit();
+
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
-      !isForModuleBuilding(SuggestedModule.getModule(),
-                           getLangOpts().CurrentModule,
-                           getLangOpts().ModuleName)) {
+  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
@@ -2226,7 +2262,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
-    for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
+    for (Module *Mod = SM; Mod; Mod = Mod->Parent)
       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
                                     FilenameTok.getLocation()));
     std::reverse(Path.begin(), Path.end());
@@ -2293,9 +2329,12 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (Action == Enter && File &&
-      !HeaderInfo.ShouldEnterIncludeFile(
-          *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
-          SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
+      !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
+                                         EnterOnce, getLangOpts().Modules, SM,
+                                         IsFirstIncludeOfFile)) {
+    // C++ standard modules:
+    // If we are not in the GMF, then we textually include only
+    // clang modules:
     // Even if we've already preprocessed this header once and know that we
     // don't need to see its contents again, we still need to import it if it's
     // modular because we might not have imported it from this submodule before.
@@ -2303,7 +2342,10 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // FIXME: We don't do this when compiling a PCH because the AST
     // serialization layer can't cope with it. This means we get local
     // submodule visibility semantics wrong in that case.
-    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
+    if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
+      Action = TrackGMFState.inGMF() ? Import : Skip;
+    else
+      Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
   }
 
   // Check for circular inclusion of the main file.
@@ -2440,8 +2482,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   switch (Action) {
   case Skip:
     // If we don't need to enter the file, stop now.
-    if (Module *M = SuggestedModule.getModule())
-      return {ImportAction::SkippedModuleImport, M};
+    if (SM)
+      return {ImportAction::SkippedModuleImport, SM};
     return {ImportAction::None};
 
   case IncludeLimitReached:
@@ -2451,16 +2493,15 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
 
   case Import: {
     // If this is a module import, make it visible if needed.
-    Module *M = SuggestedModule.getModule();
-    assert(M && "no module to import");
+    assert(SM && "no module to import");
 
-    makeModuleVisible(M, EndLoc);
+    makeModuleVisible(SM, EndLoc);
 
     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
         tok::pp___include_macros)
       return {ImportAction::None};
 
-    return {ImportAction::ModuleImport, M};
+    return {ImportAction::ModuleImport, SM};
   }
 
   case Enter:
@@ -2492,13 +2533,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     return {ImportAction::None};
 
   // Determine if we're switching to building a new submodule, and which one.
-  if (auto *M = SuggestedModule.getModule()) {
-    if (M->getTopLevelModule()->ShadowingModule) {
+  // This does not apply for C++20 modules header units.
+  if (SM && !SM->isHeaderUnit()) {
+    if (SM->getTopLevelModule()->ShadowingModule) {
       // We are building a submodule that belongs to a shadowed module. This
       // means we find header files in the shadowed module.
-      Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
-        << M->getFullModuleName();
-      Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
+      Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
+          << SM->getFullModuleName();
+      Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
            diag::note_previous_definition);
       return {ImportAction::None};
     }
@@ -2511,22 +2553,22 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // that PCH, which means we should enter the submodule. We need to teach
     // the AST serialization layer to deal with the resulting AST.
     if (getLangOpts().CompilingPCH &&
-        isForModuleBuilding(M, getLangOpts().CurrentModule,
+        isForModuleBuilding(SM, getLangOpts().CurrentModule,
                             getLangOpts().ModuleName))
       return {ImportAction::None};
 
     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
-    CurLexerSubmodule = M;
+    CurLexerSubmodule = SM;
 
     // Let the macro handling code know that any future macros are within
     // the new submodule.
-    EnterSubmodule(M, EndLoc, /*ForPragma*/false);
+    EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
 
     // Let the parser know that any future declarations are within the new
     // submodule.
     // FIXME: There's no point doing this if we're handling a #__include_macros
     // directive.
-    return {ImportAction::ModuleBegin, M};
+    return {ImportAction::ModuleBegin, SM};
   }
 
   assert(!IsImportDecl && "failed to diagnose missing module for import decl");

diff  --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 281f01fb28a40..5310db3c882bc 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -941,6 +941,9 @@ void Preprocessor::Lex(Token &Result) {
 
   // Update ImportSeqState to track our position within a C++20 import-seq
   // if this token is being produced as a result of phase 4 of translation.
+  // Update TrackGMFState to decide if we are currently in a Global Module
+  // Fragment. GMF state updates should precede ImportSeq ones, since GMF state
+  // depends on the prevailing ImportSeq state in two cases.
   if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
       !Result.getFlag(Token::IsReinjected)) {
     switch (Result.getKind()) {
@@ -953,7 +956,11 @@ void Preprocessor::Lex(Token &Result) {
     case tok::r_brace:
       ImportSeqState.handleCloseBrace();
       break;
+    // This token is injected to represent the translation of '#include "a.h"'
+    // into "import a.h;". Mimic the notional ';'.
+    case tok::annot_module_include:
     case tok::semi:
+      TrackGMFState.handleSemi();
       ImportSeqState.handleSemi();
       break;
     case tok::header_name:
@@ -961,10 +968,12 @@ void Preprocessor::Lex(Token &Result) {
       ImportSeqState.handleHeaderName();
       break;
     case tok::kw_export:
+      TrackGMFState.handleExport();
       ImportSeqState.handleExport();
       break;
     case tok::identifier:
       if (Result.getIdentifierInfo()->isModulesImport()) {
+        TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());
         ImportSeqState.handleImport();
         if (ImportSeqState.afterImportSeq()) {
           ModuleImportLoc = Result.getLocation();
@@ -973,9 +982,13 @@ void Preprocessor::Lex(Token &Result) {
           CurLexerKind = CLK_LexAfterModuleImport;
         }
         break;
+      } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
+        TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());
+        break;
       }
       LLVM_FALLTHROUGH;
     default:
+      TrackGMFState.handleMisc();
       ImportSeqState.handleMisc();
       break;
     }
@@ -1222,6 +1235,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
       LLVM_FALLTHROUGH;
 
     case ImportAction::ModuleImport:
+    case ImportAction::HeaderUnitImport:
     case ImportAction::SkippedModuleImport:
       // We chose to import (or textually enter) the file. Convert the
       // header-name token into a header unit annotation token.

diff  --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 6f63d01bc8ad2..ab8748c2c63da 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -663,12 +663,22 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
     return false;
   }
 
-  case tok::annot_module_include:
-    Actions.ActOnModuleInclude(Tok.getLocation(),
-                               reinterpret_cast<Module *>(
-                                   Tok.getAnnotationValue()));
+  case tok::annot_module_include: {
+    auto Loc = Tok.getLocation();
+    Module *Mod = reinterpret_cast<Module *>(Tok.getAnnotationValue());
+    // FIXME: We need a better way to disambiguate C++ clang modules and
+    // standard C++ modules.
+    if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit())
+      Actions.ActOnModuleInclude(Loc, Mod);
+    else {
+      DeclResult Import =
+          Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod);
+      Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get();
+      Result = Actions.ConvertDeclToDeclGroup(ImportDecl);
+    }
     ConsumeAnnotationToken();
     return false;
+  }
 
   case tok::annot_module_begin:
     Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(

diff  --git a/clang/test/Modules/cxx20-include-translation.cpp b/clang/test/Modules/cxx20-include-translation.cpp
new file mode 100644
index 0000000000000..25c6a6d9e8882
--- /dev/null
+++ b/clang/test/Modules/cxx20-include-translation.cpp
@@ -0,0 +1,109 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h1.h -emit-header-unit -o h1.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h2.h -emit-header-unit -o h2.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h3.h -emit-header-unit -o h3.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h4.h -emit-header-unit -o h4.pcm
+
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface -o Xlate.pcm \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm -fsyntax-only -Wauto-import -verify
+
+// Check that we do the intended translation and not more.
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm  -E -undef | FileCheck %s
+
+// We expect no diagnostics here, the used functions should all be available.
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm -fsyntax-only
+
+// The content of the headers is not terribly important, we just want to check
+// whether they are textually included or include-translated.
+//--- h1.h
+#ifndef H1_GUARD
+#define H1_GUARD
+
+#define ONE 1
+
+void foo();
+
+#endif // H1_GUARD
+
+//--- h2.h
+#ifndef H2_GUARD
+#define H2_GUARD
+
+#define TWO 2
+
+void bar();
+
+#endif // H2_GUARD
+
+//--- h3.h
+#ifndef H3_GUARD
+#define H3_GUARD
+
+#define THREE 3
+
+void baz();
+
+#endif // H3_GUARD
+
+//--- h4.h
+#ifndef H4_GUARD
+#define H4_GUARD
+
+#define FOUR 4
+
+void boo();
+
+#endif // H4_GUARD
+
+//--- h5.h
+#ifndef H5_GUARD
+#define H5_GUARD
+
+#define FIVE 5
+
+void five();
+
+#endif // H4_GUARD
+
+//--- Xlate.cpp
+/* some comment ...
+  ... */
+module /*nothing here*/;
+
+// This should be include-translated, when the header unit for h1 is available.
+#include "h1.h" // expected-warning {{treating #include as an import of module './h1.h'}}
+// Import of a header unit is allowed, named modules are not.
+import "h2.h";
+// A regular, untranslated, header
+#include "h5.h"
+
+export module Xlate;
+
+// This is OK, the import immediately follows the module decl.
+import "h3.h";
+
+// This should *not* be include-translated, even if header unit for h4 is
+// available.
+#include "h4.h"
+
+export void charlie() {
+  foo();
+  bar();
+  baz();
+  boo();
+  five();
+}
+
+// CHECK: #pragma clang module import "./h1.h"
+// CHECK: import ./h2.h
+// CHECK: import ./h3.h
+// CHECK-NOT: #pragma clang module import "./h4.h"


        


More information about the cfe-commits mailing list