[clang] [clang][modules] Track included files per submodule (PR #71117)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 2 14:36:00 PDT 2023


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/71117

None

>From 9debc58d5135fbde51967dfb076d0ec5d954df38 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 2 Nov 2023 14:35:30 -0700
Subject: [PATCH] [clang][modules] Track included files per submodule

---
 clang/include/clang/Basic/Module.h           |  2 +
 clang/include/clang/Lex/Preprocessor.h       | 39 +++++++++++---
 clang/lib/Serialization/ASTReader.cpp        | 52 ++++++++++---------
 clang/lib/Serialization/ASTReaderInternals.h |  3 +-
 clang/lib/Serialization/ASTWriter.cpp        | 39 +++++++++++---
 clang/test/Modules/per-submodule-includes.m  | 53 ++++++++++++++++++++
 6 files changed, 149 insertions(+), 39 deletions(-)
 create mode 100644 clang/test/Modules/per-submodule-includes.m

diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 6a7423938bdb8fa..41f3b4fb46423d6 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -423,6 +423,8 @@ class alignas(8) Module {
   /// to import but didn't because they are not direct uses.
   llvm::SmallSetVector<const Module *, 2> UndeclaredUses;
 
+  llvm::DenseSet<const FileEntry *> Includes;
+
   /// A library or framework to link against when an entity from this
   /// module is used.
   struct LinkLibrary {
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4a99447e757c6ac..fef608efdee09d9 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -982,6 +982,8 @@ class Preprocessor {
     /// The set of modules that are visible within the submodule.
     VisibleModuleSet VisibleModules;
 
+    /// The files that have been included.
+    IncludedFilesSet IncludedFiles;
     // FIXME: CounterValue?
     // FIXME: PragmaPushMacroInfo?
   };
@@ -994,8 +996,8 @@ class Preprocessor {
   /// in a submodule.
   SubmoduleState *CurSubmoduleState;
 
-  /// The files that have been included.
-  IncludedFilesSet IncludedFiles;
+  /// The files that have been included outside of (sub)modules.
+  IncludedFilesSet Includes;
 
   /// The set of top-level modules that affected preprocessing, but were not
   /// imported.
@@ -1484,19 +1486,40 @@ class Preprocessor {
   /// Mark the file as included.
   /// Returns true if this is the first time the file was included.
   bool markIncluded(FileEntryRef File) {
-    HeaderInfo.getFileInfo(File);
-    return IncludedFiles.insert(File).second;
+    bool AlreadyIncluded = alreadyIncluded(File);
+    CurSubmoduleState->IncludedFiles.insert(File);
+    if (!BuildingSubmoduleStack.empty())
+      BuildingSubmoduleStack.back().M->Includes.insert(File);
+    else if (Module *M = getCurrentModule())
+      M->Includes.insert(File);
+    else
+      Includes.insert(File);
+    return !AlreadyIncluded;
   }
 
   /// Return true if this header has already been included.
   bool alreadyIncluded(FileEntryRef File) const {
     HeaderInfo.getFileInfo(File);
-    return IncludedFiles.count(File);
+    if (CurSubmoduleState->IncludedFiles.contains(File))
+      return true;
+    // TODO: Do this more efficiently.
+    for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules())
+      if (CurSubmoduleState->VisibleModules.isVisible(M))
+        if (M->Includes.contains(File))
+          return true;
+    return false;
+  }
+
+  void markIncludedOnTopLevel(const FileEntry *File) {
+    Includes.insert(File);
+    CurSubmoduleState->IncludedFiles.insert(File);
+  }
+
+  void markIncludedInModule(Module *M, const FileEntry *File) {
+    M->Includes.insert(File);
   }
 
-  /// Get the set of included files.
-  IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
-  const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+  const IncludedFilesSet &getTopLevelIncludes() const { return Includes; }
 
   /// Return the name of the macro defined before \p Loc that has
   /// spelling \p Tokens.  If there are multiple macros with same spelling,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 42b48d230af7a97..9c32dcf3d954761 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1975,19 +1975,15 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
   return LocalID + I->second;
 }
 
-const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
+OptionalFileEntryRefDegradesToFileEntryPtr
+HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
   FileManager &FileMgr = Reader.getFileManager();
-  if (!Key.Imported) {
-    if (auto File = FileMgr.getFile(Key.Filename))
-      return *File;
-    return nullptr;
-  }
+  if (!Key.Imported)
+    return FileMgr.getOptionalFileRef(Key.Filename);
 
   std::string Resolved = std::string(Key.Filename);
   Reader.ResolveImportedPath(M, Resolved);
-  if (auto File = FileMgr.getFile(Resolved))
-    return *File;
-  return nullptr;
+  return FileMgr.getOptionalFileRef(Resolved);
 }
 
 unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -2043,13 +2039,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
 
-  bool Included = (Flags >> 6) & 0x01;
-  if (Included)
-    if (const FileEntry *FE = getFile(key))
-      // Not using \c Preprocessor::markIncluded(), since that would attempt to
-      // deserialize this header file info again.
-      Reader.getPreprocessor().getIncludedFiles().insert(FE);
-
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
   HFI.isImport |= (Flags >> 5) & 0x01;
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -2065,6 +2054,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
     HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
   }
 
+  OptionalFileEntryRefDegradesToFileEntryPtr FE = getFile(key);
+  Preprocessor &PP = Reader.getPreprocessor();
+  ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+
+  unsigned IncludedCount =
+      endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+  for (unsigned I = 0; I < IncludedCount; ++I) {
+    uint32_t LocalSMID =
+        endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+    if (!FE)
+      continue;
+
+    if (LocalSMID == 0) {
+      PP.markIncludedOnTopLevel(*FE);
+    } else {
+      SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
+      Module *Mod = Reader.getSubmodule(GlobalSMID);
+      PP.markIncludedInModule(Mod, *FE);
+    }
+  }
+
   assert((End - d) % 4 == 0 &&
          "Wrong data length in HeaderFileInfo deserialization");
   while (d != End) {
@@ -2077,14 +2087,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
     Module *Mod = Reader.getSubmodule(GlobalSMID);
-    FileManager &FileMgr = Reader.getFileManager();
-    ModuleMap &ModMap =
-        Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
-
-    std::string Filename = std::string(key.Filename);
-    if (key.Imported)
-      Reader.ResolveImportedPath(M, Filename);
-    if (auto FE = FileMgr.getOptionalFileRef(Filename)) {
+
+    if (FE) {
       // FIXME: NameAsWritten
       Module::Header H = {std::string(key.Filename), "", *FE};
       ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
diff --git a/clang/lib/Serialization/ASTReaderInternals.h b/clang/lib/Serialization/ASTReaderInternals.h
index 25a46ddabcb7078..1abcaa9ca299d1d 100644
--- a/clang/lib/Serialization/ASTReaderInternals.h
+++ b/clang/lib/Serialization/ASTReaderInternals.h
@@ -278,7 +278,8 @@ class HeaderFileInfoTrait {
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
 
 private:
-  const FileEntry *getFile(const internal_key_type &Key);
+  OptionalFileEntryRefDegradesToFileEntryPtr
+  getFile(const internal_key_type &Key);
 };
 
 /// The on-disk hash table used for known header files.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ab70594607530f4..a7fddaf8e243b95 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1842,7 +1842,7 @@ namespace {
 
     struct data_type {
       const HeaderFileInfo &HFI;
-      bool AlreadyIncluded;
+      std::vector<const Module *> Includers;
       ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
       UnresolvedModule Unresolved;
     };
@@ -1862,6 +1862,12 @@ namespace {
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
       unsigned DataLen = 1 + 4 + 4;
+
+      DataLen += 4;
+      for (const Module *M : Data.Includers)
+        if (!M || Writer.getLocalOrImportedSubmoduleID(M))
+          DataLen += 4;
+
       for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
@@ -1888,8 +1894,7 @@ namespace {
       endian::Writer LE(Out, llvm::endianness::little);
       uint64_t Start = Out.tell(); (void)Start;
 
-      unsigned char Flags = (Data.AlreadyIncluded << 6)
-                          | (Data.HFI.isImport << 5)
+      unsigned char Flags = (Data.HFI.isImport << 5)
                           | (Data.HFI.isPragmaOnce << 4)
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
@@ -1916,6 +1921,14 @@ namespace {
       }
       LE.write<uint32_t>(Offset);
 
+      LE.write<uint32_t>(Data.Includers.size());
+      for (const Module *M : Data.Includers) {
+        if (!M)
+          LE.write<uint32_t>(0);
+        else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M))
+          LE.write<uint32_t>(ModID);
+      }
+
       auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
         if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
           uint32_t Value = (ModID << 3) | (unsigned)Role;
@@ -1990,7 +2003,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
         HeaderFileInfoTrait::key_type Key = {
             FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
         HeaderFileInfoTrait::data_type Data = {
-            Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+            Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
         // FIXME: Deal with cases where there are multiple unresolved header
         // directives in different submodules for the same header.
         Generator.insert(Key, Data, GeneratorTrait);
@@ -2033,13 +2046,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
       SavedStrings.push_back(Filename.data());
     }
 
-    bool Included = PP->alreadyIncluded(*File);
+    std::vector<const Module *> Includers;
+    if (WritingModule) {
+      llvm::DenseSet<const Module *> Seen;
+      std::function<void(const Module *)> Visit = [&](const Module *M) {
+        if (!Seen.insert(M).second)
+          return;
+        if (M->Includes.contains(*File))
+          Includers.push_back(M);
+        for (const Module *SubM : M->submodules())
+          Visit(SubM);
+      };
+      Visit(WritingModule);
+    } else if (PP->getTopLevelIncludes().contains(*File)) {
+      Includers.push_back(nullptr);
+    }
 
     HeaderFileInfoTrait::key_type Key = {
       Filename, File->getSize(), getTimestampForOutput(*File)
     };
     HeaderFileInfoTrait::data_type Data = {
-      *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
+      *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
     };
     Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
diff --git a/clang/test/Modules/per-submodule-includes.m b/clang/test/Modules/per-submodule-includes.m
new file mode 100644
index 000000000000000..39fd255e8e93e92
--- /dev/null
+++ b/clang/test/Modules/per-submodule-includes.m
@@ -0,0 +1,53 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- frameworks/Textual.framework/Headers/Header.h
+static int symbol;
+
+//--- frameworks/FW.framework/Modules/module.modulemap
+framework module FW {
+  umbrella header "FW.h"
+  export *
+  module * { export * }
+}
+//--- frameworks/FW.framework/Headers/FW.h
+#import <FW/Sub1.h>
+#import <FW/Sub2.h>
+//--- frameworks/FW.framework/Headers/Sub1.h
+//--- frameworks/FW.framework/Headers/Sub2.h
+#import <Textual/Header.h>
+
+//--- pch.modulemap
+module __PCH {
+  header "pch.h"
+  export *
+}
+//--- pch.h
+#import <FW/Sub1.h>
+
+//--- tu.m
+#import <Textual/Header.h>
+int fn() { return symbol; }
+
+// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the
+// PCH is treated textually due to -fmodule-name=FW.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN:   -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN:   -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m
+
+// Compilation using the PCH as precompiled module fails. The import of FW/Sub1.h
+// in the PCH is translated to an import. Nothing is preventing that now that
+// -fmodule-name=FW has been replaced with -fmodule-name=__PCH.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
+// RUN:   -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o %t/pch.h.pcm
+//
+// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in
+// FW.Sub2), so the TU does not import it again. It's contents remain invisible,
+// though.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \
+// RUN:   -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only %t/tu.m



More information about the cfe-commits mailing list