[clang] [clang-tools-extra] [clang][modules] Do not resolve `HeaderFileInfo` externally in `ASTWriter` (PR #87848)
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 11 14:44:27 PDT 2024
https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/87848
>From ee56548604be9473f33cd809c901886f37a3d8e9 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Fri, 5 Apr 2024 15:12:39 -0700
Subject: [PATCH 1/5] [clang][modules] Do not resolve `HeaderFileInfo`
externally in `ASTWriter`
---
clang/include/clang/Lex/HeaderSearch.h | 24 ++++----
clang/lib/Lex/HeaderSearch.cpp | 76 ++++++++++++++------------
clang/lib/Serialization/ASTWriter.cpp | 11 ++--
clang/test/Modules/foo.c | 48 ++++++++++++++++
4 files changed, 107 insertions(+), 52 deletions(-)
create mode 100644 clang/test/Modules/foo.c
diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 705dcfa8aacc3f..b8724e9aa3c92e 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -529,14 +529,15 @@ class HeaderSearch {
/// Return whether the specified file is a normal header,
/// a system header, or a C++ friendly system header.
SrcMgr::CharacteristicKind getFileDirFlavor(FileEntryRef File) {
- return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo;
+ if (const HeaderFileInfo *HFI = getExistingFileInfo(File))
+ return (SrcMgr::CharacteristicKind)HFI->DirInfo;
+ return (SrcMgr::CharacteristicKind)HeaderFileInfo().DirInfo;
}
/// Mark the specified file as a "once only" file due to
/// \#pragma once.
void MarkFileIncludeOnce(FileEntryRef File) {
- HeaderFileInfo &FI = getFileInfo(File);
- FI.isPragmaOnce = true;
+ getFileInfo(File).isPragmaOnce = true;
}
/// Mark the specified file as a system header, e.g. due to
@@ -816,16 +817,17 @@ class HeaderSearch {
unsigned header_file_size() const { return FileInfo.size(); }
- /// Return the HeaderFileInfo structure for the specified FileEntry,
- /// in preparation for updating it in some way.
+ /// Return the HeaderFileInfo structure for the specified FileEntry, in
+ /// preparation for updating it in some way.
HeaderFileInfo &getFileInfo(FileEntryRef FE);
- /// Return the HeaderFileInfo structure for the specified FileEntry,
- /// if it has ever been filled in.
- /// \param WantExternal Whether the caller wants purely-external header file
- /// info (where \p External is true).
- const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE,
- bool WantExternal = true) const;
+ /// Return the HeaderFileInfo structure for the specified FileEntry, if it has
+ /// ever been filled in (either locally or externally).
+ const HeaderFileInfo *getExistingFileInfo(FileEntryRef FE) const;
+
+ /// Return the headerFileInfo structure for the specified FileEntry, if it has
+ /// ever been filled in locally.
+ const HeaderFileInfo *getExistingLocalFileInfo(FileEntryRef FE) const;
SearchDirIterator search_dir_begin() { return {*this, 0}; }
SearchDirIterator search_dir_end() { return {*this, SearchDirs.size()}; }
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcc2b56df166b8..55274f32528e65 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -947,9 +947,13 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
// If we have no includer, that means we're processing a #include
// from a module build. We should treat this as a system header if we're
// building a [system] module.
- bool IncluderIsSystemHeader =
- Includer ? getFileInfo(*Includer).DirInfo != SrcMgr::C_User :
- BuildSystemModule;
+ bool IncluderIsSystemHeader = [&]() {
+ if (!Includer)
+ return BuildSystemModule;
+ const HeaderFileInfo *HFI = getExistingFileInfo(*Includer);
+ assert(HFI && "includer without file info");
+ return HFI->DirInfo != SrcMgr::C_User;
+ }();
if (OptionalFileEntryRef FE = getFileAndSuggestModule(
TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,
RequestingModule, SuggestedModule)) {
@@ -964,10 +968,11 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
// Note that we only use one of FromHFI/ToHFI at once, due to potential
// reallocation of the underlying vector potentially making the first
// reference binding dangling.
- HeaderFileInfo &FromHFI = getFileInfo(*Includer);
- unsigned DirInfo = FromHFI.DirInfo;
- bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader;
- StringRef Framework = FromHFI.Framework;
+ const HeaderFileInfo *FromHFI = getExistingFileInfo(*Includer);
+ assert(FromHFI && "includer without file info");
+ unsigned DirInfo = FromHFI->DirInfo;
+ bool IndexHeaderMapHeader = FromHFI->IndexHeaderMapHeader;
+ StringRef Framework = FromHFI->Framework;
HeaderFileInfo &ToHFI = getFileInfo(*FE);
ToHFI.DirInfo = DirInfo;
@@ -1154,10 +1159,12 @@ OptionalFileEntryRef HeaderSearch::LookupFile(
// "Foo" is the name of the framework in which the including header was found.
if (!Includers.empty() && Includers.front().first && !isAngled &&
!Filename.contains('/')) {
- HeaderFileInfo &IncludingHFI = getFileInfo(*Includers.front().first);
- if (IncludingHFI.IndexHeaderMapHeader) {
+ const HeaderFileInfo *IncludingHFI =
+ getExistingFileInfo(*Includers.front().first);
+ assert(IncludingHFI && "includer without file info");
+ if (IncludingHFI->IndexHeaderMapHeader) {
SmallString<128> ScratchFilename;
- ScratchFilename += IncludingHFI.Framework;
+ ScratchFilename += IncludingHFI->Framework;
ScratchFilename += '/';
ScratchFilename += Filename;
@@ -1289,10 +1296,11 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
// This file is a system header or C++ unfriendly if the old file is.
//
// Note that the temporary 'DirInfo' is required here, as either call to
- // getFileInfo could resize the vector and we don't want to rely on order
- // of evaluation.
- unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
- getFileInfo(*File).DirInfo = DirInfo;
+ // getExistingFileInfo could resize the vector and we don't want to rely on
+ // order of evaluation.
+ const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt);
+ assert(ContextHFI && "context file without file info");
+ getFileInfo(*File).DirInfo = ContextHFI->DirInfo;
FrameworkName.pop_back(); // remove the trailing '/'
if (!findUsableModuleForFrameworkHeader(*File, FrameworkName,
@@ -1331,8 +1339,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
HFI.Framework = OtherHFI.Framework;
}
-/// getFileInfo - Return the HeaderFileInfo structure for the specified
-/// FileEntry.
HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
if (FE.getUID() >= FileInfo.size())
FileInfo.resize(FE.getUID() + 1);
@@ -1349,27 +1355,20 @@ HeaderFileInfo &HeaderSearch::getFileInfo(FileEntryRef FE) {
}
HFI->IsValid = true;
- // We have local information about this header file, so it's no longer
- // strictly external.
+ // We assume the caller has local information about this header file, so it's
+ // no longer strictly external.
HFI->External = false;
return *HFI;
}
-const HeaderFileInfo *
-HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
- // If we have an external source, ensure we have the latest information.
- // FIXME: Use a generation count to check whether this is really up to date.
+const HeaderFileInfo *HeaderSearch::getExistingFileInfo(FileEntryRef FE) const {
HeaderFileInfo *HFI;
if (ExternalSource) {
- if (FE.getUID() >= FileInfo.size()) {
- if (!WantExternal)
- return nullptr;
+ if (FE.getUID() >= FileInfo.size())
FileInfo.resize(FE.getUID() + 1);
- }
HFI = &FileInfo[FE.getUID()];
- if (!WantExternal && (!HFI->IsValid || HFI->External))
- return nullptr;
+ // FIXME: Use a generation count to check whether this is really up to date.
if (!HFI->Resolved) {
auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
if (ExternalHFI.IsValid) {
@@ -1378,16 +1377,25 @@ HeaderSearch::getExistingFileInfo(FileEntryRef FE, bool WantExternal) const {
mergeHeaderFileInfo(*HFI, ExternalHFI);
}
}
- } else if (FE.getUID() >= FileInfo.size()) {
- return nullptr;
- } else {
+ } else if (FE.getUID() < FileInfo.size()) {
HFI = &FileInfo[FE.getUID()];
+ } else {
+ HFI = nullptr;
}
- if (!HFI->IsValid || (HFI->External && !WantExternal))
- return nullptr;
+ return (HFI && HFI->IsValid) ? HFI : nullptr;
+}
+
+const HeaderFileInfo *
+HeaderSearch::getExistingLocalFileInfo(FileEntryRef FE) const {
+ HeaderFileInfo *HFI;
+ if (FE.getUID() < FileInfo.size()) {
+ HFI = &FileInfo[FE.getUID()];
+ } else {
+ HFI = nullptr;
+ }
- return HFI;
+ return (HFI && HFI->IsValid && !HFI->External) ? HFI : nullptr;
}
bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ba6a8a5e16e4e7..a05caca1b5a6fb 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -178,8 +178,7 @@ std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
if (!File)
continue;
- const HeaderFileInfo *HFI =
- HS.getExistingFileInfo(*File, /*WantExternal*/ false);
+ const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
continue;
@@ -2045,14 +2044,12 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
if (!File)
continue;
- // Get the file info. This will load info from the external source if
- // necessary. Skip emitting this file if we have no information on it
- // as a header file (in which case HFI will be null) or if it hasn't
+ // Get the file info. Skip emitting this file if we have no information on
+ // it as a header file (in which case HFI will be null) or if it hasn't
// changed since it was loaded. Also skip it if it's for a modular header
// from a different module; in that case, we rely on the module(s)
// containing the header to provide this information.
- const HeaderFileInfo *HFI =
- HS.getExistingFileInfo(*File, /*WantExternal*/!Chain);
+ const HeaderFileInfo *HFI = HS.getExistingLocalFileInfo(*File);
if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
continue;
diff --git a/clang/test/Modules/foo.c b/clang/test/Modules/foo.c
new file mode 100644
index 00000000000000..8f6860be1bff27
--- /dev/null
+++ b/clang/test/Modules/foo.c
@@ -0,0 +1,48 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// Not to be committed, just here as a demonstration.
+
+//--- frameworks/A.framework/Modules/module.modulemap
+framework module A {
+ header "A1.h"
+ header "A2.h"
+ header "A3.h"
+}
+//--- frameworks/A.framework/Headers/A1.h
+//--- frameworks/A.framework/Headers/A2.h
+//--- frameworks/A.framework/Headers/A3.h
+//--- EndA
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=A %t/frameworks/A.framework/Modules/module.modulemap -o %t/A.pcm
+
+//--- frameworks/B.framework/Modules/module.modulemap
+framework module B {
+ header "B1.h"
+ header "B2.h"
+ header "B3.h"
+}
+//--- frameworks/B.framework/Headers/B1.h
+//--- frameworks/B.framework/Headers/B2.h
+//--- frameworks/B.framework/Headers/B3.h
+//--- EndB
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=B %t/frameworks/B.framework/Modules/module.modulemap -o %t/B.pcm
+
+//--- frameworks/X.framework/Modules/module.modulemap
+framework module X { header "X.h" }
+//--- frameworks/X.framework/Headers/X.h
+#import <A/A1.h>
+#import <B/B1.h>
+// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=X %t/frameworks/X.framework/Modules/module.modulemap -o %t/X.pcm \
+// RUN: -fmodule-map-file=%t/frameworks/A.framework/Modules/module.modulemap -fmodule-file=A=%t/A.pcm \
+// RUN: -fmodule-map-file=%t/frameworks/B.framework/Modules/module.modulemap -fmodule-file=B=%t/B.pcm
+
+// Without this patch, ASTWriter would go looking for:
+// * "X.h" in A.pcm and B.pcm and not comparing it to any of their files due to size difference
+// * "A2.h" and compare it to "A1.h", "A2.h" in A.pcm
+// * "A3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm
+// * "B2.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h" in B.pcm
+// * "B3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h", "B3.h" in B.pcm
+
+// With this patch, ASTWriter doesn't go looking for anything of the above.
+// * Clang already knows that "X.h" belongs to the current module and needs to be serialized,
+// while the other headers belong to A or B and do not need to be serialized.
>From 6f564f3afb0206c58c050a4fa05f73a0618c9eec Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan at svoboda.ai>
Date: Sun, 7 Apr 2024 18:20:02 -0700
Subject: [PATCH 2/5] Fix SymbolCollector.cpp
---
clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 85b8fc549b016e..5c4e2150cf3123 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache {
// Framework headers are spelled as <FrameworkName/Foo.h>, not
// "path/FrameworkName.framework/Headers/Foo.h".
auto &HS = PP->getHeaderSearchInfo();
- if (const auto *HFI = HS.getExistingFileInfo(*FE, /*WantExternal*/ false))
+ if (const auto *HFI = HS.getExistingFileInfo(*FE))
if (!HFI->Framework.empty())
if (auto Spelling =
getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))
>From 3fd78a8bedd2b8326f02696af9c30fe335988b85 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Mon, 8 Apr 2024 09:14:54 -0700
Subject: [PATCH 3/5] Undo incorrect change
---
clang/lib/Lex/HeaderSearch.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 55274f32528e65..9c3d8626cfabae 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -1294,13 +1294,12 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader(
}
// This file is a system header or C++ unfriendly if the old file is.
- //
- // Note that the temporary 'DirInfo' is required here, as either call to
- // getExistingFileInfo could resize the vector and we don't want to rely on
- // order of evaluation.
const HeaderFileInfo *ContextHFI = getExistingFileInfo(ContextFileEnt);
assert(ContextHFI && "context file without file info");
- getFileInfo(*File).DirInfo = ContextHFI->DirInfo;
+ // Note that the temporary 'DirInfo' is required here, as the call to
+ // getFileInfo could resize the vector and might invalidate 'ContextHFI'.
+ unsigned DirInfo = ContextHFI->DirInfo;
+ getFileInfo(*File).DirInfo = DirInfo;
FrameworkName.pop_back(); // remove the trailing '/'
if (!findUsableModuleForFrameworkHeader(*File, FrameworkName,
>From 223a7b4272805eb0ddb2ade081cf43294102ffab Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 11 Apr 2024 09:18:15 -0700
Subject: [PATCH 4/5] Use `getExistingLocalFileInfo()` in SymbolCollector.cpp
---
clang-tools-extra/clangd/index/SymbolCollector.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 5c4e2150cf3123..969f27af6fb1d8 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -409,7 +409,7 @@ class SymbolCollector::HeaderFileURICache {
// Framework headers are spelled as <FrameworkName/Foo.h>, not
// "path/FrameworkName.framework/Headers/Foo.h".
auto &HS = PP->getHeaderSearchInfo();
- if (const auto *HFI = HS.getExistingFileInfo(*FE))
+ if (const auto *HFI = HS.getExistingLocalFileInfo(*FE))
if (!HFI->Framework.empty())
if (auto Spelling =
getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))
>From d06b355fded3ffcf480c580b9976feb676159866 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Thu, 11 Apr 2024 14:43:59 -0700
Subject: [PATCH 5/5] Remove foo.c test
---
clang/test/Modules/foo.c | 48 ----------------------------------------
1 file changed, 48 deletions(-)
delete mode 100644 clang/test/Modules/foo.c
diff --git a/clang/test/Modules/foo.c b/clang/test/Modules/foo.c
deleted file mode 100644
index 8f6860be1bff27..00000000000000
--- a/clang/test/Modules/foo.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// RUN: rm -rf %t
-// RUN: split-file %s %t
-
-// Not to be committed, just here as a demonstration.
-
-//--- frameworks/A.framework/Modules/module.modulemap
-framework module A {
- header "A1.h"
- header "A2.h"
- header "A3.h"
-}
-//--- frameworks/A.framework/Headers/A1.h
-//--- frameworks/A.framework/Headers/A2.h
-//--- frameworks/A.framework/Headers/A3.h
-//--- EndA
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=A %t/frameworks/A.framework/Modules/module.modulemap -o %t/A.pcm
-
-//--- frameworks/B.framework/Modules/module.modulemap
-framework module B {
- header "B1.h"
- header "B2.h"
- header "B3.h"
-}
-//--- frameworks/B.framework/Headers/B1.h
-//--- frameworks/B.framework/Headers/B2.h
-//--- frameworks/B.framework/Headers/B3.h
-//--- EndB
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=B %t/frameworks/B.framework/Modules/module.modulemap -o %t/B.pcm
-
-//--- frameworks/X.framework/Modules/module.modulemap
-framework module X { header "X.h" }
-//--- frameworks/X.framework/Headers/X.h
-#import <A/A1.h>
-#import <B/B1.h>
-// RUN: %clang_cc1 -fmodules -F %t/frameworks -emit-module -fmodule-name=X %t/frameworks/X.framework/Modules/module.modulemap -o %t/X.pcm \
-// RUN: -fmodule-map-file=%t/frameworks/A.framework/Modules/module.modulemap -fmodule-file=A=%t/A.pcm \
-// RUN: -fmodule-map-file=%t/frameworks/B.framework/Modules/module.modulemap -fmodule-file=B=%t/B.pcm
-
-// Without this patch, ASTWriter would go looking for:
-// * "X.h" in A.pcm and B.pcm and not comparing it to any of their files due to size difference
-// * "A2.h" and compare it to "A1.h", "A2.h" in A.pcm
-// * "A3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm
-// * "B2.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h" in B.pcm
-// * "B3.h" and compare it to "A1.h", "A2.h", "A3.h" in A.pcm; "B1.h", "B2.h", "B3.h" in B.pcm
-
-// With this patch, ASTWriter doesn't go looking for anything of the above.
-// * Clang already knows that "X.h" belongs to the current module and needs to be serialized,
-// while the other headers belong to A or B and do not need to be serialized.
More information about the cfe-commits
mailing list