[clang] 6924a49 - [clang][modules] Account for non-affecting inputs in `ASTWriter`

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 1 19:32:57 PDT 2022


Author: Jan Svoboda
Date: 2022-11-01T19:31:51-07:00
New Revision: 6924a49690eee074f6c341d3ada4e98d640b8a7e

URL: https://github.com/llvm/llvm-project/commit/6924a49690eee074f6c341d3ada4e98d640b8a7e
DIFF: https://github.com/llvm/llvm-project/commit/6924a49690eee074f6c341d3ada4e98d640b8a7e.diff

LOG: [clang][modules] Account for non-affecting inputs in `ASTWriter`

In D106876, we stopped serializing module map files that didn't affect compilation of the current module.

However, since each `SourceLocation` is simply an offset into `SourceManager`'s global buffer of concatenated input files in, these need to be adjusted during serialization. Otherwise, they can incorrectly point after the buffer or into subsequent input file.

This patch starts adjusting `SourceLocation`s, `FileID`s and other `SourceManager` offsets in `ASTWriter`.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D136624

Added: 
    

Modified: 
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTWriter.cpp
    clang/test/Modules/add-remove-irrelevant-module-map.m

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index c5ad007dac330..cb929fc19bd21 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -444,8 +444,21 @@ class ASTWriter : public ASTDeserializationListener,
   std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
       ModuleFileExtensionWriters;
 
-  /// User ModuleMaps skipped when writing control block.
-  std::set<const FileEntry *> SkippedModuleMaps;
+  /// Mapping from a source location entry to whether it is affecting or not.
+  llvm::BitVector IsSLocAffecting;
+
+  /// Mapping from \c FileID to an index into the FileID adjustment table.
+  std::vector<FileID> NonAffectingFileIDs;
+  std::vector<unsigned> NonAffectingFileIDAdjustments;
+
+  /// Mapping from an offset to an index into the offset adjustment table.
+  std::vector<SourceRange> NonAffectingRanges;
+  std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;
+
+  /// Collects input files that didn't affect compilation of the current module,
+  /// and initializes data structures necessary for leaving those files out
+  /// during \c SourceManager serialization.
+  void collectNonAffectingInputFiles();
 
   /// Returns an adjusted \c FileID, accounting for any non-affecting input
   /// files.
@@ -462,6 +475,9 @@ class ASTWriter : public ASTDeserializationListener,
   /// Returns an adjusted \c SourceLocation offset, accounting for any
   /// non-affecting input files.
   SourceLocation::UIntTy getAdjustedOffset(SourceLocation::UIntTy Offset) const;
+  /// Returns an adjustment for offset into SourceManager, accounting for any
+  /// non-affecting input files.
+  SourceLocation::UIntTy getAdjustment(SourceLocation::UIntTy Offset) const;
 
   /// Retrieve or create a submodule ID for this module.
   unsigned getSubmoduleID(Module *Mod);
@@ -481,8 +497,7 @@ class ASTWriter : public ASTDeserializationListener,
   static std::pair<ASTFileSignature, ASTFileSignature>
   createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
 
-  void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
-                       std::set<const FileEntry *> &AffectingClangModuleMaps);
+  void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
                                const Preprocessor &PP);
   void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a298d43543514..409f66c6e4da7 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -161,8 +161,8 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {
 
 namespace {
 
-std::set<const FileEntry *> GetAllModuleMaps(const HeaderSearch &HS,
-                                             Module *RootModule) {
+std::set<const FileEntry *> GetAffectingModuleMaps(const HeaderSearch &HS,
+                                                   Module *RootModule) {
   std::set<const FileEntry *> ModuleMaps{};
   std::set<const Module *> ProcessedModules;
   SmallVector<const Module *> ModulesToProcess{RootModule};
@@ -1477,15 +1477,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   AddFileID(SM.getMainFileID(), Record);
   Stream.EmitRecord(ORIGINAL_FILE_ID, Record);
 
-  std::set<const FileEntry *> AffectingClangModuleMaps;
-  if (WritingModule) {
-    AffectingClangModuleMaps =
-        GetAllModuleMaps(PP.getHeaderSearchInfo(), WritingModule);
-  }
-
   WriteInputFiles(Context.SourceMgr,
-                  PP.getHeaderSearchInfo().getHeaderSearchOpts(),
-                  AffectingClangModuleMaps);
+                  PP.getHeaderSearchInfo().getHeaderSearchOpts());
   Stream.ExitBlock();
 }
 
@@ -1503,9 +1496,8 @@ struct InputFileEntry {
 
 } // namespace
 
-void ASTWriter::WriteInputFiles(
-    SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
-    std::set<const FileEntry *> &AffectingClangModuleMaps) {
+void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
+                                HeaderSearchOptions &HSOpts) {
   using namespace llvm;
 
   Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
@@ -1545,15 +1537,9 @@ void ASTWriter::WriteInputFiles(
     if (!Cache->OrigEntry)
       continue;
 
-    if (isModuleMap(File.getFileCharacteristic()) &&
-        !isSystem(File.getFileCharacteristic()) &&
-        !AffectingClangModuleMaps.empty() &&
-        AffectingClangModuleMaps.find(Cache->OrigEntry) ==
-            AffectingClangModuleMaps.end()) {
-      SkippedModuleMaps.insert(Cache->OrigEntry);
-      // Do not emit modulemaps that do not affect current module.
+    // Do not emit input files that do not affect current module.
+    if (!IsSLocAffecting[I])
       continue;
-    }
 
     InputFileEntry Entry;
     Entry.File = Cache->OrigEntry;
@@ -2064,12 +2050,9 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     if (SLoc->isFile()) {
       const SrcMgr::FileInfo &File = SLoc->getFile();
       const SrcMgr::ContentCache *Content = &File.getContentCache();
-      if (Content->OrigEntry && !SkippedModuleMaps.empty() &&
-          SkippedModuleMaps.find(Content->OrigEntry) !=
-              SkippedModuleMaps.end()) {
-        // Do not emit files that were not listed as inputs.
+      // Do not emit files that were not listed as inputs.
+      if (!IsSLocAffecting[I])
         continue;
-      }
       SLocEntryOffsets.push_back(Offset);
       // Starting offset of this entry within this module, so skip the dummy.
       Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2);
@@ -4527,6 +4510,69 @@ static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
   }
 }
 
+void ASTWriter::collectNonAffectingInputFiles() {
+  SourceManager &SrcMgr = PP->getSourceManager();
+  unsigned N = SrcMgr.local_sloc_entry_size();
+
+  IsSLocAffecting.resize(N, true);
+
+  if (!WritingModule)
+    return;
+
+  auto AffectingModuleMaps =
+      GetAffectingModuleMaps(PP->getHeaderSearchInfo(), WritingModule);
+
+  unsigned FileIDAdjustment = 0;
+  unsigned OffsetAdjustment = 0;
+
+  NonAffectingFileIDAdjustments.reserve(N);
+  NonAffectingOffsetAdjustments.reserve(N);
+
+  NonAffectingFileIDAdjustments.push_back(FileIDAdjustment);
+  NonAffectingOffsetAdjustments.push_back(OffsetAdjustment);
+
+  for (unsigned I = 1; I != N; ++I) {
+    const SrcMgr::SLocEntry *SLoc = &SrcMgr.getLocalSLocEntry(I);
+    FileID FID = FileID::get(I);
+    assert(&SrcMgr.getSLocEntry(FID) == SLoc);
+
+    if (!SLoc->isFile())
+      continue;
+    const SrcMgr::FileInfo &File = SLoc->getFile();
+    const SrcMgr::ContentCache *Cache = &File.getContentCache();
+    if (!Cache->OrigEntry)
+      continue;
+
+    if (!isModuleMap(File.getFileCharacteristic()) ||
+        isSystem(File.getFileCharacteristic()) || AffectingModuleMaps.empty() ||
+        AffectingModuleMaps.find(Cache->OrigEntry) != AffectingModuleMaps.end())
+      continue;
+
+    IsSLocAffecting[I] = false;
+
+    FileIDAdjustment += 1;
+    // Even empty files take up one element in the offset table.
+    OffsetAdjustment += SrcMgr.getFileIDSize(FID) + 1;
+
+    // If the previous file was non-affecting as well, just extend its entry
+    // with our information.
+    if (!NonAffectingFileIDs.empty() &&
+        NonAffectingFileIDs.back().ID == FID.ID - 1) {
+      NonAffectingFileIDs.back() = FID;
+      NonAffectingRanges.back().setEnd(SrcMgr.getLocForEndOfFile(FID));
+      NonAffectingFileIDAdjustments.back() = FileIDAdjustment;
+      NonAffectingOffsetAdjustments.back() = OffsetAdjustment;
+      continue;
+    }
+
+    NonAffectingFileIDs.push_back(FID);
+    NonAffectingRanges.emplace_back(SrcMgr.getLocForStartOfFile(FID),
+                                    SrcMgr.getLocForEndOfFile(FID));
+    NonAffectingFileIDAdjustments.push_back(FileIDAdjustment);
+    NonAffectingOffsetAdjustments.push_back(OffsetAdjustment);
+  }
+}
+
 ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
                                          Module *WritingModule) {
   using namespace llvm;
@@ -4540,6 +4586,8 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   ASTContext &Context = SemaRef.Context;
   Preprocessor &PP = SemaRef.PP;
 
+  collectNonAffectingInputFiles();
+
   // Set up predefined declaration IDs.
   auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
     if (D) {
@@ -5229,32 +5277,65 @@ void ASTWriter::AddAlignPackInfo(const Sema::AlignPackInfo &Info,
 }
 
 FileID ASTWriter::getAdjustedFileID(FileID FID) const {
-  // TODO: Actually adjust this.
-  return FID;
+  if (FID.isInvalid() || PP->getSourceManager().isLoadedFileID(FID) ||
+      NonAffectingFileIDs.empty())
+    return FID;
+  auto It = llvm::lower_bound(NonAffectingFileIDs, FID);
+  unsigned Idx = std::distance(NonAffectingFileIDs.begin(), It);
+  unsigned Offset = NonAffectingFileIDAdjustments[Idx];
+  return FileID::get(FID.getOpaqueValue() - Offset);
 }
 
 unsigned ASTWriter::getAdjustedNumCreatedFIDs(FileID FID) const {
-  // TODO: Actually adjust this.
-  return PP->getSourceManager()
-      .getLocalSLocEntry(FID.ID)
-      .getFile()
-      .NumCreatedFIDs;
+  unsigned NumCreatedFIDs = PP->getSourceManager()
+                                .getLocalSLocEntry(FID.ID)
+                                .getFile()
+                                .NumCreatedFIDs;
+
+  unsigned AdjustedNumCreatedFIDs = 0;
+  for (unsigned I = FID.ID, N = I + NumCreatedFIDs; I != N; ++I)
+    if (IsSLocAffecting[I])
+      ++AdjustedNumCreatedFIDs;
+  return AdjustedNumCreatedFIDs;
 }
 
 SourceLocation ASTWriter::getAdjustedLocation(SourceLocation Loc) const {
-  // TODO: Actually adjust this.
-  return Loc;
+  if (Loc.isInvalid())
+    return Loc;
+  return Loc.getLocWithOffset(-getAdjustment(Loc.getOffset()));
 }
 
 SourceRange ASTWriter::getAdjustedRange(SourceRange Range) const {
-  // TODO: Actually adjust this.
-  return Range;
+  return SourceRange(getAdjustedLocation(Range.getBegin()),
+                     getAdjustedLocation(Range.getEnd()));
 }
 
 SourceLocation::UIntTy
 ASTWriter::getAdjustedOffset(SourceLocation::UIntTy Offset) const {
-  // TODO: Actually adjust this.
-  return Offset;
+  return Offset - getAdjustment(Offset);
+}
+
+SourceLocation::UIntTy
+ASTWriter::getAdjustment(SourceLocation::UIntTy Offset) const {
+  if (NonAffectingRanges.empty())
+    return 0;
+
+  if (PP->getSourceManager().isLoadedOffset(Offset))
+    return 0;
+
+  if (Offset > NonAffectingRanges.back().getEnd().getOffset())
+    return NonAffectingOffsetAdjustments.back();
+
+  if (Offset < NonAffectingRanges.front().getBegin().getOffset())
+    return 0;
+
+  auto Contains = [](const SourceRange &Range, SourceLocation::UIntTy Offset) {
+    return Range.getEnd().getOffset() < Offset;
+  };
+
+  auto It = llvm::lower_bound(NonAffectingRanges, Offset, Contains);
+  unsigned Idx = std::distance(NonAffectingRanges.begin(), It);
+  return NonAffectingOffsetAdjustments[Idx];
 }
 
 void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) {
@@ -5517,6 +5598,7 @@ void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) {
   if (FID.isInvalid())
     return;
   assert(SM.getSLocEntry(FID).isFile());
+  assert(IsSLocAffecting[FID.ID]);
 
   std::unique_ptr<DeclIDInFileInfo> &Info = FileDeclIDs[FID];
   if (!Info)

diff  --git a/clang/test/Modules/add-remove-irrelevant-module-map.m b/clang/test/Modules/add-remove-irrelevant-module-map.m
index 25c45a4cd6eb9..7e3e58037e6f2 100644
--- a/clang/test/Modules/add-remove-irrelevant-module-map.m
+++ b/clang/test/Modules/add-remove-irrelevant-module-map.m
@@ -1,58 +1,32 @@
 // RUN: rm -rf %t && mkdir %t
 // RUN: split-file %s %t
 
-//--- a.modulemap
+//--- a/module.modulemap
 module a {}
 
-//--- b.modulemap
+//--- b/module.modulemap
 module b {}
 
-//--- test-simple.m
-// expected-no-diagnostics
- at import a;
-
-// Build without b.modulemap:
-//
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \
-// RUN:   -fmodule-map-file=%t/a.modulemap %t/test-simple.m -verify
-// RUN: mv %t/cache %t/cache-without-b
-
-// Build with b.modulemap:
-//
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \
-// RUN:   -fmodule-map-file=%t/a.modulemap -fmodule-map-file=%t/b.modulemap %t/test-simple.m -verify
-// RUN: mv %t/cache %t/cache-with-b
-
-// Neither PCM file considers 'b.modulemap' an input:
-//
-// RUN: %clang_cc1 -module-file-info %t/cache-without-b/a.pcm | FileCheck %s --check-prefix=CHECK-B
-// RUN: %clang_cc1 -module-file-info %t/cache-with-b/a.pcm | FileCheck %s --check-prefix=CHECK-B
-// CHECK-B-NOT: Input file: {{.*}}b.modulemap
-
-//--- c.modulemap
-module c [no_undeclared_includes] { header "c.h" }
+//--- c/module.modulemap
+module c {}
 
-//--- c.h
-#if __has_include("d.h") // This should use 'd.modulemap' in order to determine that 'd.h'
-                         // doesn't exist for 'c' because of its '[no_undeclared_includes]'.
-#endif
-
-//--- d.modulemap
-module d { header "d.h" }
-
-//--- d.h
-// empty
+//--- module.modulemap
+module m { header "m.h" }
+//--- m.h
+ at import c;
 
-//--- test-no-undeclared-includes.m
+//--- test-simple.m
 // expected-no-diagnostics
- at import c;
+ at import m;
+
+// Build modules with the non-affecting "a/module.modulemap".
+// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
+// RUN: mv %t/cache %t/cache-with
 
-// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fdisable-module-hash \
-// RUN:   -fmodule-map-file=%t/c.modulemap -fmodule-map-file=%t/d.modulemap \
-// RUN:   %t/test-no-undeclared-includes.m -verify
+// Build modules without the non-affecting "a/module.modulemap".
+// RUN: rm -rf %t/a/module.modulemap
+// RUN: %clang_cc1 -I %t/a -I %t/b -I %t/c -I %t -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash %t/test-simple.m -verify
+// RUN: mv %t/cache %t/cache-without
 
-// The PCM file considers 'd.modulemap' an input because it affects the compilation,
-// although it doesn't describe the built module or its imports.
-//
-// RUN: %clang_cc1 -module-file-info %t/cache/c.pcm | FileCheck %s --check-prefix=CHECK-D
-// CHECK-D: Input file: {{.*}}d.modulemap
+// Check that the PCM files are bit-for-bit identical.
+// RUN: 
diff  %t/cache-with/m.pcm %t/cache-without/m.pcm


        


More information about the cfe-commits mailing list