[clang] f720272 - [clang][lex] Include tracking: simplify and move to preprocessor

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 26 06:56:31 PST 2022


Author: Jan Svoboda
Date: 2022-01-26T15:56:26+01:00
New Revision: f7202723304461c4f94399b906333d6ede85579a

URL: https://github.com/llvm/llvm-project/commit/f7202723304461c4f94399b906333d6ede85579a
DIFF: https://github.com/llvm/llvm-project/commit/f7202723304461c4f94399b906333d6ede85579a.diff

LOG: [clang][lex] Include tracking: simplify and move to preprocessor

This patch replaces the exact include count of each file in `HeaderFileInfo` with a set of included files in `Preprocessor`.

The number of includes isn't a property of a header file but rather a preprocessor state. The exact number of includes is not used anywhere except statistic tracking.

Reviewed By: vsapsai

Differential Revision: https://reviews.llvm.org/D114095

Added: 
    

Modified: 
    clang/include/clang/Lex/HeaderSearch.h
    clang/include/clang/Lex/Preprocessor.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/include/clang/Serialization/ASTReader.h
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Lex/HeaderSearch.cpp
    clang/lib/Lex/PPDirectives.cpp
    clang/lib/Lex/Preprocessor.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 9b9d28433c080..74768717470bf 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -57,6 +57,8 @@ class TargetInfo;
 /// The preprocessor keeps track of this information for each
 /// file that is \#included.
 struct HeaderFileInfo {
+  // TODO: Whether the file was imported is not a property of the file itself.
+  // It's a preprocessor state, move it there.
   /// True if this is a \#import'd file.
   unsigned isImport : 1;
 
@@ -95,9 +97,6 @@ struct HeaderFileInfo {
   /// Whether this file has been looked up as a header.
   unsigned IsValid : 1;
 
-  /// The number of times the file has been included already.
-  unsigned short NumIncludes = 0;
-
   /// The ID number of the controlling macro.
   ///
   /// This ID number will be non-zero when there is a controlling
@@ -469,12 +468,6 @@ class HeaderSearch {
                             ModuleMap::ModuleHeaderRole Role,
                             bool isCompilingModuleHeader);
 
-  /// Increment the count for the number of times the specified
-  /// FileEntry has been entered.
-  void IncrementIncludeCount(const FileEntry *File) {
-    ++getFileInfo(File).NumIncludes;
-  }
-
   /// Mark the specified file as having a controlling macro.
   ///
   /// This is used by the multiple-include optimization to eliminate

diff  --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index c62bf0c4ceb6f..e567f6391531d 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -450,6 +450,8 @@ class Preprocessor {
           ElseLoc(ElseLoc) {}
   };
 
+  using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
+
 private:
   friend class ASTReader;
   friend class MacroArgs;
@@ -765,6 +767,9 @@ class Preprocessor {
   /// in a submodule.
   SubmoduleState *CurSubmoduleState;
 
+  /// The files that have been included.
+  IncludedFilesSet IncludedFiles;
+
   /// The set of known macros exported from modules.
   llvm::FoldingSet<ModuleMacro> ModuleMacros;
 
@@ -1224,6 +1229,22 @@ class Preprocessor {
 
   /// \}
 
+  /// Mark the file as included.
+  /// Returns true if this is the first time the file was included.
+  bool markIncluded(const FileEntry *File) {
+    HeaderInfo.getFileInfo(File);
+    return IncludedFiles.insert(File).second;
+  }
+
+  /// Return true if this header has already been included.
+  bool alreadyIncluded(const FileEntry *File) const {
+    return IncludedFiles.count(File);
+  }
+
+  /// Get the set of included files.
+  IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
+  const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+
   /// Return the name of the macro defined before \p Loc that has
   /// spelling \p Tokens.  If there are multiple macros with same spelling,
   /// return the last one defined.

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 341da5bd1d62e..f98e173b158c1 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -695,6 +695,9 @@ enum ASTRecordTypes {
 
   /// Record code for \#pragma float_control options.
   FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
+
+  /// Record code for included files.
+  PP_INCLUDED_FILES = 66,
 };
 
 /// Record types used within a source manager block.

diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index a36c8ba20a10a..d46a6c4500f4e 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1329,6 +1329,7 @@ class ASTReader
   llvm::Error ReadSourceManagerBlock(ModuleFile &F);
   llvm::BitstreamCursor &SLocCursorForID(int ID);
   SourceLocation getImportLocation(ModuleFile *F);
+  void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
   ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
                                        const ModuleFile *ImportedBy,
                                        unsigned ClientLoadCapabilities);

diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 27a8770d7f267..e455e4d4d96a5 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -465,6 +465,7 @@ class ASTWriter : public ASTDeserializationListener,
                        std::set<const FileEntry *> &AffectingModuleMaps);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
                                const Preprocessor &PP);
+  void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec,

diff  --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcd759af6d5ea..39c125c395ef8 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -90,16 +90,10 @@ HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
 void HeaderSearch::PrintStats() {
   llvm::errs() << "\n*** HeaderSearch Stats:\n"
                << FileInfo.size() << " files tracked.\n";
-  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
-  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+  unsigned NumOnceOnlyFiles = 0;
+  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i)
     NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport);
-    if (MaxNumIncludes < FileInfo[i].NumIncludes)
-      MaxNumIncludes = FileInfo[i].NumIncludes;
-    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
-  }
-  llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n"
-               << "  " << NumSingleIncludedFiles << " included exactly once.\n"
-               << "  " << MaxNumIncludes << " max times a file is included.\n";
+  llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
 
   llvm::errs() << "  " << NumIncluded << " #include/#include_next/#import.\n"
                << "    " << NumMultiIncludeFileOptzn
@@ -1243,7 +1237,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
   HFI.isModuleHeader |= OtherHFI.isModuleHeader;
-  HFI.NumIncludes += OtherHFI.NumIncludes;
 
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
     HFI.ControllingMacro = OtherHFI.ControllingMacro;
@@ -1404,7 +1397,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
     FileInfo.isImport = true;
 
     // Has this already been #import'ed or #include'd?
-    if (FileInfo.NumIncludes && !TryEnterImported())
+    if (PP.alreadyIncluded(File) && !TryEnterImported())
       return false;
   } else {
     // Otherwise, if this is a #include of a file that was previously #import'd
@@ -1427,10 +1420,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
     }
   }
 
-  // Increment the number of times this file has been included.
-  ++FileInfo.NumIncludes;
-
-  IsFirstIncludeOfFile = FileInfo.NumIncludes == 1;
+  IsFirstIncludeOfFile = PP.markIncluded(File);
 
   return true;
 }

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 29fc8b3aa7a0c..f3aefdd22b514 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2058,7 +2058,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // include cycle. Don't enter already processed files again as it can lead to
   // reaching the max allowed include depth again.
   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
-      HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes)
+      alreadyIncluded(*File))
     Action = IncludeLimitReached;
 
   // Determine whether we should try to import the module for this #include, if

diff  --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 3eea0be7b762c..3c338a2b81235 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -549,7 +549,7 @@ void Preprocessor::EnterMainSourceFile() {
     // Tell the header info that the main file was entered.  If the file is later
     // #imported, it won't be re-entered.
     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
-      HeaderInfo.IncrementIncludeCount(FE);
+      markIncluded(FE);
   }
 
   // Preprocess Predefines to populate the initial preprocessor state.

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9056f00978c8f..d806fb9e19494 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1887,10 +1887,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
   HFI.DirInfo = (Flags >> 1) & 0x07;
   HFI.IndexHeaderMapHeader = Flags & 0x01;
-  // FIXME: Find a better way to handle this. Maybe just store a
-  // "has been included" flag?
-  HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
-                             HFI.NumIncludes);
   HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
       M, endian::readNext<uint32_t, little, unaligned>(d));
   if (unsigned FrameworkOffset =
@@ -2962,6 +2958,22 @@ ASTReader::ReadControlBlock(ModuleFile &F,
   }
 }
 
+void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
+                                  Preprocessor &PP) {
+  using namespace llvm::support;
+
+  const unsigned char *D = (const unsigned char *)Blob.data();
+  unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
+
+  for (unsigned I = 0; I < FileCount; ++I) {
+    size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
+    InputFileInfo IFI = readInputFileInfo(F, ID);
+    if (llvm::ErrorOr<const FileEntry *> File =
+            PP.getFileManager().getFile(IFI.Filename))
+      PP.getIncludedFiles().insert(*File);
+  }
+}
+
 llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
                                     unsigned ClientLoadCapabilities) {
   BitstreamCursor &Stream = F.Stream;
@@ -3700,6 +3712,10 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
+    case PP_INCLUDED_FILES:
+      readIncludedFiles(F, Blob, PP);
+      break;
+
     case LATE_PARSED_TEMPLATE:
       LateParsedTemplates.emplace_back(
           std::piecewise_construct, std::forward_as_tuple(&F),

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c2bee93b077e6..763fc9537c04b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -862,6 +862,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
+  RECORD(PP_INCLUDED_FILES);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -1773,7 +1774,7 @@ namespace {
     std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
-      unsigned DataLen = 1 + 2 + 4 + 4;
+      unsigned DataLen = 1 + 4 + 4;
       for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
@@ -1805,7 +1806,6 @@ namespace {
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
       LE.write<uint8_t>(Flags);
-      LE.write<uint16_t>(Data.HFI.NumIncludes);
 
       if (!Data.HFI.ControllingMacro)
         LE.write<uint32_t>(Data.HFI.ControllingMacroID);
@@ -2254,6 +2254,29 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
   return false;
 }
 
+void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
+  using namespace llvm::support;
+
+  const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
+
+  std::vector<uint32_t> IncludedInputFileIDs;
+  IncludedInputFileIDs.reserve(IncludedFiles.size());
+
+  for (const FileEntry *File : IncludedFiles) {
+    auto InputFileIt = InputFileIDs.find(File);
+    if (InputFileIt == InputFileIDs.end())
+      continue;
+    IncludedInputFileIDs.push_back(InputFileIt->second);
+  }
+
+  llvm::sort(IncludedInputFileIDs);
+
+  endian::Writer LE(Out, little);
+  LE.write<uint32_t>(IncludedInputFileIDs.size());
+  for (uint32_t ID : IncludedInputFileIDs)
+    LE.write<uint32_t>(ID);
+}
+
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2462,6 +2485,20 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
                                        MacroOffsetsBase - ASTBlockStartOffset};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
+
+  {
+    auto Abbrev = std::make_shared<BitCodeAbbrev>();
+    Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+    unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+    SmallString<2048> Buffer;
+    raw_svector_ostream Out(Buffer);
+    writeIncludedFiles(Out, PP);
+    RecordData::value_type Record[] = {PP_INCLUDED_FILES};
+    Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
+                              Buffer.size());
+  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,


        


More information about the cfe-commits mailing list