[clang] [clang-tools-extra] [clang][modules] Do not resolve `HeaderFileInfo` externally in `ASTWriter` (PR #87848)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 11 14:44:27 PDT 2024


https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/87848

>From ee56548604be9473f33cd809c901886f37a3d8e9 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Fri, 5 Apr 2024 15:12:39 -0700
Subject: [PATCH 1/5] [clang][modules] Do not resolve `HeaderFileInfo`
 externally in `ASTWriter`

---
 clang/include/clang/Lex/HeaderSearch.h | 24 ++++----
 clang/lib/Lex/HeaderSearch.cpp         | 76 ++++++++++++++------------
 clang/lib/Serialization/ASTWriter.cpp  | 11 ++--
 clang/test/Modules/foo.c               | 48 ++++++++++++++++
 4 files changed, 107 insertions(+), 52 deletions(-)
 create mode 100644 clang/test/Modules/foo.c

diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 705dcfa8aacc3f..b8724e9aa3c92e 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -529,14 +529,15 @@ class HeaderSearch {
   /// Return whether the specified file is a normal header,
   /// a system header, or a C++ friendly system header.
   SrcMgr::CharacteristicKind getFileDirFlavor(FileEntryRef File) {
-    return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo;
+    if (const HeaderFileInfo *HFI = getExistingFileInfo(File))
+      return (SrcMgr::CharacteristicKind)HFI->DirInfo;
+    return (SrcMgr::CharacteristicKind)HeaderFileInfo().DirInfo;
   }
 
   /// Mark the specified file as a "once only" file due to
   /// \#pragma once.
   void MarkFileIncludeOnce(FileEntryRef File) {
-    HeaderFileInfo &FI = getFileInfo(File);
-    FI.isPragmaOnce = true;
+    getFileInfo(File).isPragmaOnce = true;
   }
 
   /// Mark the specified file as a system header, e.g. due to
@@ -816,16 +817,17 @@ class HeaderSearch {
 
   unsigned header_file_size() const { return FileInfo.size(); }
 
-  /// Return the HeaderFileInfo structure for the specified FileEntry,
-  /// in preparation for updating it in some way.
+  /// Return the HeaderFileInfo structure for the specified FileEntry, in
+  /// preparation for updating it in some way.
   HeaderFileInfo &getFileInfo(FileEntryRef FE);
 
-  /// Return the HeaderFileInfo structure for the specified FileEntry,
-  /// if it has ever been filled in.
-  /// \param WantExternal Whether the caller wants purely-external header file
-  ///        info (where \p External is true).
-  const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE,
-                                            bool WantExternal = true) const;
+  /// Return the HeaderFileInfo structure for the specified FileEntry, if it has
+  /// ever been filled in (either locally or externally).
+  const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const;
+
+  /// Return the headerFileInfo structure for the specified FileEntry, if it has
+  /// ever been filled in locally.
+  const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const;
 
   SearchDirIterator search_dir_begin() { return {*this, 0}; }
   SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; }
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcc2b56df166b8..55274f32528e65 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -947,9 +947,13 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
       // If we have no includer, that means we're processing a #include
       // from a module build. We should treat this as a system header if we're
       // building a [system] module.
-      bool IncluderIsSystemHeader =
-          Includer ? getFileInfo(*Includer).DirInfo != SrcMgr::C_User :
-          BuildSystemModule;
+      bool IncluderIsSystemHeader = [&]() {
+        if (!Includer)
+          return BuildSystemModule;
+        const HeaderFileInfo *HFI = getExistingFileInfo(*Includer);
+        assert(HFI && "includer without file info");
+        return HFI->DirInfo != SrcMgr::C_User;
+      }();
       if (OptionalFileEntryRef FE = getFileAndSuggestModule(
               TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,
               RequestingModule, SuggestedModule)) {
@@ -964,10 +968,11 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
         // Note that we only use one of FromHFI/ToHFI at once, due to potential
         // reallocation of the underlying vector potentially making the first
         // reference binding dangling.
-        HeaderFileInfo &FromHFI = getFileInfo(*Includer);
-        unsigned DirInfo = FromHFI.DirInfo;
-        bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader;
-        StringRef Framework = FromHFI.Framework;
+        const HeaderFileInfo *FromHFI = getExistingFileInfo(*Includer);
+        assert(FromHFI && "includer without file info");
+        unsigned DirInfo = FromHFI->DirInfo;
+        bool IndexHeaderMapHeader = FromHFI->IndexHeaderMapHeader;
+        StringRef Framework = FromHFI->Framework;
 
         HeaderFileInfo &ToHFI = getFileInfo(*FE);
         ToHFI.DirInfo = DirInfo;
@@ -1154,10 +1159,12 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
   // "Foo" is the name of the framework in which the including header was found.
   if (!Includers.empty() && Includers.front().first && !isAngled &&
       !Filename.contains('/')) {
-    HeaderFileInfo &IncludingHFI = getFileInfo(*Includers.front().first);
-    if (IncludingHFI.IndexHeaderMapHeader) {
+    const HeaderFileInfo *IncludingHFI =
+        getExistingFileInfo(*Includers.front().first);
+    assert(IncludingHFI && "includer without file info");
+    if (IncludingHFI->IndexHeaderMapHeader) {
       SmallString<128> ScratchFilename;
-      ScratchFilename += IncludingHFI.Framework;
+      ScratchFilename += IncludingHFI->Framework;
       ScratchFilename += '/';
       ScratchFilename += Filename;
 
@@ -1289,10 +1296,11 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
   // This file is a system header or C++ unfriendly if the old file is.
   //
   // Note that the temporary 'DirInfo' is required here, as either call to
-  // getFileInfo could resize the vector and we don't want to rely on order
-  // of evaluation.
-  unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
-  getFileInfo(*File).DirInfo = DirInfo;
+  // getExistingFileInfo could resize the vector and we don't want to rely on
+  // order of evaluation.
+  const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt);
+  assert(ContextHFI && "context file without file info");
+  getFileInfo(*File).DirInfo = ContextHFI->DirInfo;
 
   FrameworkName.pop_back(); // remove the trailing '/'
   if (!findUsableModuleForFrameworkHeader(*File, FrameworkName,
@@ -1331,8 +1339,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
     HFI.Framework = OtherHFI.Framework;
 }
 
-/// getFileInfo - Return the HeaderFileInfo structure for the specified
-/// FileEntry.
 HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
   if (FE.getUID() >= FileInfo.size())
     FileInfo.resize(FE.getUID() + 1);
@@ -1349,27 +1355,20 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
   }
 
   HFI->IsValid = true;
-  // We have local information about this header file, so it's no longer
-  // strictly external.
+  // We assume the caller has local information about this header file, so it's
+  // no longer strictly external.
   HFI->External = false;
   return *HFI;
 }
 
-const HeaderFileInfo *
-HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
-  // If we have an external source, ensure we have the latest information.
-  // FIXME: Use a generation count to check whether this is really up to date.
+const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const {
   HeaderFileInfo *HFI;
   if (ExternalSource) {
-    if (FE.getUID() >= FileInfo.size()) {
-      if (!WantExternal)
-        return nullptr;
+    if (FE.getUID() >= FileInfo.size())
       FileInfo.resize(FE.getUID() + 1);
-    }
 
     HFI = &FileInfo[FE.getUID()];
-    if (!WantExternal && (!HFI->IsValid || HFI->External))
-      return nullptr;
+    // FIXME: Use a generation count to check whether this is really up to date.
     if (!HFI->Resolved) {
       auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
       if (ExternalHFI.IsValid) {
@@ -1378,16 +1377,25 @@ HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
           mergeHeaderFileInfo(*HFI, ExternalHFI);
       }
     }
-  } else if (FE.getUID() >= FileInfo.size()) {
-    return nullptr;
-  } else {
+  } else if (FE.getUID() < FileInfo.size()) {
     HFI = &FileInfo[FE.getUID()];
+  } else {
+    HFI = nullptr;
   }
 
-  if (!HFI->IsValid || (HFI->External && !WantExternal))
-    return nullptr;
+  return (HFI && HFI->IsValid) ? HFI : nullptr;
+}
+
+const HeaderFileInfo *
+HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const {
+  HeaderFileInfo *HFI;
+  if (FE.getUID() < FileInfo.size()) {
+    HFI = &FileInfo[FE.getUID()];
+  } else {
+    HFI = nullptr;
+  }
 
-  return HFI;
+  return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr;
 }
 
 bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ba6a8a5e16e4e7..a05caca1b5a6fb 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -178,8 +178,7 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
     if (!File)
       continue;
 
-    const HeaderFileInfo *HFI =
-        HS.getExistingFileInfo(*File, /*WantExternal*/ false);
+    const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
     if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
       continue;
 
@@ -2045,14 +2044,12 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
     if (!File)
       continue;
 
-    // Get the file info. This will load info from the external source if
-    // necessary. Skip emitting this file if we have no information on it
-    // as a header file (in which case HFI will be null) or if it hasn't
+    // Get the file info. Skip emitting this file if we have no information on
+    // it as a header file (in which case HFI will be null) or if it hasn't
     // changed since it was loaded. Also skip it if it's for a modular header
     // from a different module; in that case, we rely on the module(s)
     // containing the header to provide this information.
-    const HeaderFileInfo *HFI =
-        HS.getExistingFileInfo(*File, /*WantExternal*/!Chain);
+    const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
     if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
       continue;
 
diff --git a/clang/test/Modules/foo.c b/clang/test/Modules/foo.c
new file mode 100644
index 00000000000000..8f6860be1bff27
--- /dev/null
+++ b/clang/test/Modules/foo.c
@@ -0,0 +1,48 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// Not to be committed, just here as a demonstration.
+
+//--- frameworks/A.framework/Modules/module.modulemap
+framework module A {
+  header "A1.h"
+  header "A2.h"
+  header "A3.h"
+}
+//--- frameworks/A.framework/Headers/A1.h
+//--- frameworks/A.framework/Headers/A2.h
+//--- frameworks/A.framework/Headers/A3.h
+//--- EndA
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=A %t/frameworks/A.framework/Modules/module.modulemap -o %t/A.pcm
+
+//--- frameworks/B.framework/Modules/module.modulemap
+framework module B {
+  header "B1.h"
+  header "B2.h"
+  header "B3.h"
+}
+//--- frameworks/B.framework/Headers/B1.h
+//--- frameworks/B.framework/Headers/B2.h
+//--- frameworks/B.framework/Headers/B3.h
+//--- EndB
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=B %t/frameworks/B.framework/Modules/module.modulemap -o %t/B.pcm
+
+//--- frameworks/X.framework/Modules/module.modulemap
+framework module X { header "X.h" }
+//--- frameworks/X.framework/Headers/X.h
+#import <A/A1.h>
+#import <B/B1.h>
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=X %t/frameworks/X.framework/Modules/module.modulemap -o %t/X.pcm \
+// RUN:   -fmodule-map-file=%t/frameworks/A.framework/Modules/module.modulemap -fmodule-file=A=%t/A.pcm \
+// RUN:   -fmodule-map-file=%t/frameworks/B.framework/Modules/module.modulemap -fmodule-file=B=%t/B.pcm
+
+// Without this patch, ASTWriter would go looking for:
+// * "X.h" in A.pcm and B.pcm and not comparing it to any of their files due to size difference
+// * "A2.h" and compare it to "A1.h", "A2.h"                                          in A.pcm
+// * "A3.h" and compare it to "A1.h", "A2.h", "A3.h"                                  in A.pcm
+// * "B2.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h"         in B.pcm
+// * "B3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h", "B3.h" in B.pcm
+
+// With this patch, ASTWriter doesn't go looking for anything of the above.
+// * Clang already knows that "X.h" belongs to the current module and needs to be serialized,
+//   while the other headers belong to A or B and do not need to be serialized.

>From 6f564f3afb0206c58c050a4fa05f73a0618c9eec Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan at svoboda.ai>
Date: Sun, 7 Apr 2024 18:20:02 -0700
Subject: [PATCH 2/5] Fix SymbolCollector.cpp

---
 clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 85b8fc549b016e..5c4e2150cf3123 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache {
     // Framework headers are spelled as <FrameworkName/Foo.h>, not
     // "path/FrameworkName.framework/Headers/Foo.h".
     auto &HS = PP->getHeaderSearchInfo();
-    if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false))
+    if (const auto *HFI = HS.getExistingFileInfo(*FE))
       if (!HFI->Framework.empty())
         if (auto Spelling =
                 getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))

>From 3fd78a8bedd2b8326f02696af9c30fe335988b85 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 8 Apr 2024 09:14:54 -0700
Subject: [PATCH 3/5] Undo incorrect change

---
 clang/lib/Lex/HeaderSearch.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 55274f32528e65..9c3d8626cfabae 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -1294,13 +1294,12 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
   }
 
   // This file is a system header or C++ unfriendly if the old file is.
-  //
-  // Note that the temporary 'DirInfo' is required here, as either call to
-  // getExistingFileInfo could resize the vector and we don't want to rely on
-  // order of evaluation.
   const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt);
   assert(ContextHFI && "context file without file info");
-  getFileInfo(*File).DirInfo = ContextHFI->DirInfo;
+  // Note that the temporary 'DirInfo' is required here, as the call to
+  // getFileInfo could resize the vector and might invalidate 'ContextHFI'.
+  unsigned DirInfo = ContextHFI->DirInfo;
+  getFileInfo(*File).DirInfo = DirInfo;
 
   FrameworkName.pop_back(); // remove the trailing '/'
   if (!findUsableModuleForFrameworkHeader(*File, FrameworkName,

>From 223a7b4272805eb0ddb2ade081cf43294102ffab Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 11 Apr 2024 09:18:15 -0700
Subject: [PATCH 4/5] Use `getExistingLocalFileInfo()` in SymbolCollector.cpp

---
 clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 5c4e2150cf3123..969f27af6fb1d8 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache {
     // Framework headers are spelled as <FrameworkName/Foo.h>, not
     // "path/FrameworkName.framework/Headers/Foo.h".
     auto &HS = PP->getHeaderSearchInfo();
-    if (const auto *HFI = HS.getExistingFileInfo(*FE))
+    if (const auto *HFI = HS.getExistingLocalFileInfo(*FE))
       if (!HFI->Framework.empty())
         if (auto Spelling =
                 getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))

>From d06b355fded3ffcf480c580b9976feb676159866 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 11 Apr 2024 14:43:59 -0700
Subject: [PATCH 5/5] Remove foo.c test

---
 clang/test/Modules/foo.c | 48 ----------------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 clang/test/Modules/foo.c

diff --git a/clang/test/Modules/foo.c b/clang/test/Modules/foo.c
deleted file mode 100644
index 8f6860be1bff27..00000000000000
--- a/clang/test/Modules/foo.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// RUN: rm -rf %t
-// RUN: split-file %s %t
-
-// Not to be committed, just here as a demonstration.
-
-//--- frameworks/A.framework/Modules/module.modulemap
-framework module A {
-  header "A1.h"
-  header "A2.h"
-  header "A3.h"
-}
-//--- frameworks/A.framework/Headers/A1.h
-//--- frameworks/A.framework/Headers/A2.h
-//--- frameworks/A.framework/Headers/A3.h
-//--- EndA
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=A %t/frameworks/A.framework/Modules/module.modulemap -o %t/A.pcm
-
-//--- frameworks/B.framework/Modules/module.modulemap
-framework module B {
-  header "B1.h"
-  header "B2.h"
-  header "B3.h"
-}
-//--- frameworks/B.framework/Headers/B1.h
-//--- frameworks/B.framework/Headers/B2.h
-//--- frameworks/B.framework/Headers/B3.h
-//--- EndB
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=B %t/frameworks/B.framework/Modules/module.modulemap -o %t/B.pcm
-
-//--- frameworks/X.framework/Modules/module.modulemap
-framework module X { header "X.h" }
-//--- frameworks/X.framework/Headers/X.h
-#import <A/A1.h>
-#import <B/B1.h>
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=X %t/frameworks/X.framework/Modules/module.modulemap -o %t/X.pcm \
-// RUN:   -fmodule-map-file=%t/frameworks/A.framework/Modules/module.modulemap -fmodule-file=A=%t/A.pcm \
-// RUN:   -fmodule-map-file=%t/frameworks/B.framework/Modules/module.modulemap -fmodule-file=B=%t/B.pcm
-
-// Without this patch, ASTWriter would go looking for:
-// * "X.h" in A.pcm and B.pcm and not comparing it to any of their files due to size difference
-// * "A2.h" and compare it to "A1.h", "A2.h"                                          in A.pcm
-// * "A3.h" and compare it to "A1.h", "A2.h", "A3.h"                                  in A.pcm
-// * "B2.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h"         in B.pcm
-// * "B3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h", "B3.h" in B.pcm
-
-// With this patch, ASTWriter doesn't go looking for anything of the above.
-// * Clang already knows that "X.h" belongs to the current module and needs to be serialized,
-//   while the other headers belong to A or B and do not need to be serialized.



More information about the cfe-commits mailing list