[clang] f720272 - [clang][lex] Include tracking: simplify and move to preprocessor
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 26 06:56:31 PST 2022
Author: Jan Svoboda
Date: 2022-01-26T15:56:26+01:00
New Revision: f7202723304461c4f94399b906333d6ede85579a
URL: https://github.com/llvm/llvm-project/commit/f7202723304461c4f94399b906333d6ede85579a
DIFF: https://github.com/llvm/llvm-project/commit/f7202723304461c4f94399b906333d6ede85579a.diff
LOG: [clang][lex] Include tracking: simplify and move to preprocessor
This patch replaces the exact include count of each file in `HeaderFileInfo` with a set of included files in `Preprocessor`.
The number of includes isn't a property of a header file but rather a preprocessor state. The exact number of includes is not used anywhere except statistic tracking.
Reviewed By: vsapsai
Differential Revision: https://reviews.llvm.org/D114095
Added:
Modified:
clang/include/clang/Lex/HeaderSearch.h
clang/include/clang/Lex/Preprocessor.h
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTWriter.h
clang/lib/Lex/HeaderSearch.cpp
clang/lib/Lex/PPDirectives.cpp
clang/lib/Lex/Preprocessor.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 9b9d28433c080..74768717470bf 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -57,6 +57,8 @@ class TargetInfo;
/// The preprocessor keeps track of this information for each
/// file that is \#included.
struct HeaderFileInfo {
+ // TODO: Whether the file was imported is not a property of the file itself.
+ // It's a preprocessor state, move it there.
/// True if this is a \#import'd file.
unsigned isImport : 1;
@@ -95,9 +97,6 @@ struct HeaderFileInfo {
/// Whether this file has been looked up as a header.
unsigned IsValid : 1;
- /// The number of times the file has been included already.
- unsigned short NumIncludes = 0;
-
/// The ID number of the controlling macro.
///
/// This ID number will be non-zero when there is a controlling
@@ -469,12 +468,6 @@ class HeaderSearch {
ModuleMap::ModuleHeaderRole Role,
bool isCompilingModuleHeader);
- /// Increment the count for the number of times the specified
- /// FileEntry has been entered.
- void IncrementIncludeCount(const FileEntry *File) {
- ++getFileInfo(File).NumIncludes;
- }
-
/// Mark the specified file as having a controlling macro.
///
/// This is used by the multiple-include optimization to eliminate
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index c62bf0c4ceb6f..e567f6391531d 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -450,6 +450,8 @@ class Preprocessor {
ElseLoc(ElseLoc) {}
};
+ using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
+
private:
friend class ASTReader;
friend class MacroArgs;
@@ -765,6 +767,9 @@ class Preprocessor {
/// in a submodule.
SubmoduleState *CurSubmoduleState;
+ /// The files that have been included.
+ IncludedFilesSet IncludedFiles;
+
/// The set of known macros exported from modules.
llvm::FoldingSet<ModuleMacro> ModuleMacros;
@@ -1224,6 +1229,22 @@ class Preprocessor {
/// \}
+ /// Mark the file as included.
+ /// Returns true if this is the first time the file was included.
+ bool markIncluded(const FileEntry *File) {
+ HeaderInfo.getFileInfo(File);
+ return IncludedFiles.insert(File).second;
+ }
+
+ /// Return true if this header has already been included.
+ bool alreadyIncluded(const FileEntry *File) const {
+ return IncludedFiles.count(File);
+ }
+
+ /// Get the set of included files.
+ IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
+ const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+
/// Return the name of the macro defined before \p Loc that has
/// spelling \p Tokens. If there are multiple macros with same spelling,
/// return the last one defined.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 341da5bd1d62e..f98e173b158c1 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -695,6 +695,9 @@ enum ASTRecordTypes {
/// Record code for \#pragma float_control options.
FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
+
+ /// Record code for included files.
+ PP_INCLUDED_FILES = 66,
};
/// Record types used within a source manager block.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index a36c8ba20a10a..d46a6c4500f4e 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1329,6 +1329,7 @@ class ASTReader
llvm::Error ReadSourceManagerBlock(ModuleFile &F);
llvm::BitstreamCursor &SLocCursorForID(int ID);
SourceLocation getImportLocation(ModuleFile *F);
+ void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
const ModuleFile *ImportedBy,
unsigned ClientLoadCapabilities);
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 27a8770d7f267..e455e4d4d96a5 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -465,6 +465,7 @@ class ASTWriter : public ASTDeserializationListener,
std::set<const FileEntry *> &AffectingModuleMaps);
void WriteSourceManagerBlock(SourceManager &SourceMgr,
const Preprocessor &PP);
+ void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
void WritePreprocessor(const Preprocessor &PP, bool IsModule);
void WriteHeaderSearch(const HeaderSearch &HS);
void WritePreprocessorDetail(PreprocessingRecord &PPRec,
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcd759af6d5ea..39c125c395ef8 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -90,16 +90,10 @@ HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
void HeaderSearch::PrintStats() {
llvm::errs() << "\n*** HeaderSearch Stats:\n"
<< FileInfo.size() << " files tracked.\n";
- unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
- for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+ unsigned NumOnceOnlyFiles = 0;
+ for (unsigned i = 0, e = FileInfo.size(); i != e; ++i)
NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport);
- if (MaxNumIncludes < FileInfo[i].NumIncludes)
- MaxNumIncludes = FileInfo[i].NumIncludes;
- NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
- }
- llvm::errs() << " " << NumOnceOnlyFiles << " #import/#pragma once files.\n"
- << " " << NumSingleIncludedFiles << " included exactly once.\n"
- << " " << MaxNumIncludes << " max times a file is included.\n";
+ llvm::errs() << " " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
llvm::errs() << " " << NumIncluded << " #include/#include_next/#import.\n"
<< " " << NumMultiIncludeFileOptzn
@@ -1243,7 +1237,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
HFI.isImport |= OtherHFI.isImport;
HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
HFI.isModuleHeader |= OtherHFI.isModuleHeader;
- HFI.NumIncludes += OtherHFI.NumIncludes;
if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
HFI.ControllingMacro = OtherHFI.ControllingMacro;
@@ -1404,7 +1397,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
FileInfo.isImport = true;
// Has this already been #import'ed or #include'd?
- if (FileInfo.NumIncludes && !TryEnterImported())
+ if (PP.alreadyIncluded(File) && !TryEnterImported())
return false;
} else {
// Otherwise, if this is a #include of a file that was previously #import'd
@@ -1427,10 +1420,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
}
}
- // Increment the number of times this file has been included.
- ++FileInfo.NumIncludes;
-
- IsFirstIncludeOfFile = FileInfo.NumIncludes == 1;
+ IsFirstIncludeOfFile = PP.markIncluded(File);
return true;
}
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 29fc8b3aa7a0c..f3aefdd22b514 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2058,7 +2058,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
// include cycle. Don't enter already processed files again as it can lead to
// reaching the max allowed include depth again.
if (Action == Enter && HasReachedMaxIncludeDepth && File &&
- HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes)
+ alreadyIncluded(*File))
Action = IncludeLimitReached;
// Determine whether we should try to import the module for this #include, if
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 3eea0be7b762c..3c338a2b81235 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -549,7 +549,7 @@ void Preprocessor::EnterMainSourceFile() {
// Tell the header info that the main file was entered. If the file is later
// #imported, it won't be re-entered.
if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
- HeaderInfo.IncrementIncludeCount(FE);
+ markIncluded(FE);
}
// Preprocess Predefines to populate the initial preprocessor state.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9056f00978c8f..d806fb9e19494 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1887,10 +1887,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
HFI.DirInfo = (Flags >> 1) & 0x07;
HFI.IndexHeaderMapHeader = Flags & 0x01;
- // FIXME: Find a better way to handle this. Maybe just store a
- // "has been included" flag?
- HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
- HFI.NumIncludes);
HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
M, endian::readNext<uint32_t, little, unaligned>(d));
if (unsigned FrameworkOffset =
@@ -2962,6 +2958,22 @@ ASTReader::ReadControlBlock(ModuleFile &F,
}
}
+void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
+ Preprocessor &PP) {
+ using namespace llvm::support;
+
+ const unsigned char *D = (const unsigned char *)Blob.data();
+ unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
+
+ for (unsigned I = 0; I < FileCount; ++I) {
+ size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
+ InputFileInfo IFI = readInputFileInfo(F, ID);
+ if (llvm::ErrorOr<const FileEntry *> File =
+ PP.getFileManager().getFile(IFI.Filename))
+ PP.getIncludedFiles().insert(*File);
+ }
+}
+
llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
unsigned ClientLoadCapabilities) {
BitstreamCursor &Stream = F.Stream;
@@ -3700,6 +3712,10 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
}
+ case PP_INCLUDED_FILES:
+ readIncludedFiles(F, Blob, PP);
+ break;
+
case LATE_PARSED_TEMPLATE:
LateParsedTemplates.emplace_back(
std::piecewise_construct, std::forward_as_tuple(&F),
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c2bee93b077e6..763fc9537c04b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -862,6 +862,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
RECORD(PP_CONDITIONAL_STACK);
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
+ RECORD(PP_INCLUDED_FILES);
// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
@@ -1773,7 +1774,7 @@ namespace {
std::pair<unsigned, unsigned>
EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
- unsigned DataLen = 1 + 2 + 4 + 4;
+ unsigned DataLen = 1 + 4 + 4;
for (auto ModInfo : Data.KnownHeaders)
if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
DataLen += 4;
@@ -1805,7 +1806,6 @@ namespace {
| (Data.HFI.DirInfo << 1)
| Data.HFI.IndexHeaderMapHeader;
LE.write<uint8_t>(Flags);
- LE.write<uint16_t>(Data.HFI.NumIncludes);
if (!Data.HFI.ControllingMacro)
LE.write<uint32_t>(Data.HFI.ControllingMacroID);
@@ -2254,6 +2254,29 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
return false;
}
+void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
+ using namespace llvm::support;
+
+ const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
+
+ std::vector<uint32_t> IncludedInputFileIDs;
+ IncludedInputFileIDs.reserve(IncludedFiles.size());
+
+ for (const FileEntry *File : IncludedFiles) {
+ auto InputFileIt = InputFileIDs.find(File);
+ if (InputFileIt == InputFileIDs.end())
+ continue;
+ IncludedInputFileIDs.push_back(InputFileIt->second);
+ }
+
+ llvm::sort(IncludedInputFileIDs);
+
+ endian::Writer LE(Out, little);
+ LE.write<uint32_t>(IncludedInputFileIDs.size());
+ for (uint32_t ID : IncludedInputFileIDs)
+ LE.write<uint32_t>(ID);
+}
+
/// Writes the block containing the serialized form of the
/// preprocessor.
void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2462,6 +2485,20 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
MacroOffsetsBase - ASTBlockStartOffset};
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
}
+
+ {
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+ unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+ SmallString<2048> Buffer;
+ raw_svector_ostream Out(Buffer);
+ writeIncludedFiles(Out, PP);
+ RecordData::value_type Record[] = {PP_INCLUDED_FILES};
+ Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
+ Buffer.size());
+ }
}
void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
More information about the cfe-commits
mailing list