[clang] 6fb08d8 - Reland "[clang][modules] Move `UNHASHED_CONTROL_BLOCK` up in the AST file"

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Mon Aug 28 09:54:47 PDT 2023


Author: Jan Svoboda
Date: 2023-08-28T09:54:39-07:00
New Revision: 6fb08d8f558a6f28db7835acdb88cab83aea2eb4

URL: https://github.com/llvm/llvm-project/commit/6fb08d8f558a6f28db7835acdb88cab83aea2eb4
DIFF: https://github.com/llvm/llvm-project/commit/6fb08d8f558a6f28db7835acdb88cab83aea2eb4.diff

LOG: Reland "[clang][modules] Move `UNHASHED_CONTROL_BLOCK` up in the AST file"

This reverts commit b6ba804f7775f89f230ee1e62526a2f8225c7966, effectively relanding commit 7d1565727dad3acb54fe76a908630843835d7bc8.

The original commit incorrectly called `ASTWriter::writeUnhashedControlBlock()` before `ASTWriter::collectNonAffectingInputFiles()`, causing SourceLocations/FileIDs in the pragma diagnostic mappings block to be invalid. This is now tested by `clang/test/Modules/diag-mappings-affecting.c`.

Added: 
    clang/test/Modules/diag-mappings-affecting.c

Modified: 
    clang/include/clang/Basic/Module.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Serialization/GlobalModuleIndex.cpp
    clang/test/Modules/ASTSignature.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index a4ad8ad2f768fe..7f1eb6faecf4e7 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -81,6 +81,12 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
     return Sentinel;
   }
 
+  static ASTFileSignature createDummy() {
+    ASTFileSignature Dummy;
+    Dummy.fill(0x00);
+    return Dummy;
+  }
+
   template <typename InputIt>
   static ASTFileSignature create(InputIt First, InputIt Last) {
     assert(std::distance(First, Last) == size &&

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index dcc7bdf9b4eaac..9e115f2a5cce3f 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 28;
+const unsigned VERSION_MAJOR = 29;
 
 /// AST file minor version number supported by this version of
 /// Clang.

diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index b9d197c15c1344..f2c7c03ff09360 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -128,10 +128,17 @@ class ASTWriter : public ASTDeserializationListener,
   /// The module we're currently writing, if any.
   Module *WritingModule = nullptr;
 
-  /// The offset of the first bit inside the AST_BLOCK.
+  /// The byte range representing all the UNHASHED_CONTROL_BLOCK.
+  std::pair<uint64_t, uint64_t> UnhashedControlBlockRange;
+  /// The bit offset of the AST block hash blob.
+  uint64_t ASTBlockHashOffset = 0;
+  /// The bit offset of the signature blob.
+  uint64_t SignatureOffset = 0;
+
+  /// The bit offset of the first bit inside the AST_BLOCK.
   uint64_t ASTBlockStartOffset = 0;
 
-  /// The range representing all the AST_BLOCK.
+  /// The byte range representing all the AST_BLOCK.
   std::pair<uint64_t, uint64_t> ASTBlockRange;
 
   /// The base directory for any relative paths we emit.
@@ -495,12 +502,11 @@ class ASTWriter : public ASTDeserializationListener,
                          StringRef isysroot);
 
   /// Write out the signature and diagnostic options, and return the signature.
-  ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP,
-                                             ASTContext &Context);
+  void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context);
+  ASTFileSignature backpatchSignature();
 
   /// Calculate hash of the pcm content.
-  static std::pair<ASTFileSignature, ASTFileSignature>
-  createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
+  std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
 
   void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 82dac5fbfc888d..e707f346a2c652 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -4734,12 +4734,6 @@ ASTReader::ReadASTCore(StringRef FileName,
       ShouldFinalizePCM = true;
       return Success;
 
-    case UNHASHED_CONTROL_BLOCK_ID:
-      // This block is handled using look-ahead during ReadControlBlock.  We
-      // shouldn't get here!
-      Error("malformed block record in AST file");
-      return Failure;
-
     default:
       if (llvm::Error Err = Stream.SkipBlock()) {
         Error(std::move(Err));
@@ -4859,13 +4853,18 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
     }
     switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
     case SIGNATURE:
-      if (F)
-        F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
+      if (F) {
+        F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
+        assert(F->Signature != ASTFileSignature::createDummy() &&
+               "Dummy AST file signature not backpatched in ASTWriter.");
+      }
       break;
     case AST_BLOCK_HASH:
-      if (F)
-        F->ASTBlockHash =
-            ASTFileSignature::create(Record.begin(), Record.end());
+      if (F) {
+        F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end());
+        assert(F->ASTBlockHash != ASTFileSignature::createDummy() &&
+               "Dummy AST block hash not backpatched in ASTWriter.");
+      }
       break;
     case DIAGNOSTIC_OPTIONS: {
       bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
@@ -5167,9 +5166,12 @@ static ASTFileSignature readASTFileSignature(StringRef PCH) {
       consumeError(MaybeRecord.takeError());
       return ASTFileSignature();
     }
-    if (SIGNATURE == MaybeRecord.get())
-      return ASTFileSignature::create(Record.begin(),
-                                      Record.begin() + ASTFileSignature::size);
+    if (SIGNATURE == MaybeRecord.get()) {
+      auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
+      assert(Signature != ASTFileSignature::createDummy() &&
+             "Dummy AST file signature not backpatched in ASTWriter.");
+      return Signature;
+    }
   }
 }
 

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 2a12779b0d7d12..c9148b67c06ea9 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1120,50 +1120,97 @@ adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
 }
 
 std::pair<ASTFileSignature, ASTFileSignature>
-ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
+ASTWriter::createSignature() const {
+  StringRef AllBytes(Buffer.data(), Buffer.size());
+
   llvm::SHA1 Hasher;
-  Hasher.update(ASTBlockBytes);
+  Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second));
   ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
 
-  // Add the remaining bytes (i.e. bytes before the unhashed control block that
-  // are not part of the AST block).
-  Hasher.update(
-      AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
+  // Add the remaining bytes:
+  //  1. Before the unhashed control block.
+  Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first));
+  //  2. Between the unhashed control block and the AST block.
   Hasher.update(
-      AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
+      AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first));
+  //  3. After the AST block.
+  Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos));
   ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
 
   return std::make_pair(ASTBlockHash, Signature);
 }
 
-ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
-                                                      ASTContext &Context) {
+ASTFileSignature ASTWriter::backpatchSignature() {
+  if (!WritingModule ||
+      !PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
+    return {};
+
+  // For implicit modules, write the hash of the PCM as its signature.
+
+  auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
+    using WordT = unsigned;
+    std::array<WordT, sizeof(ASTFileSignature) / sizeof(WordT)> Words;
+    static_assert(sizeof(Words) == sizeof(S));
+    std::memcpy(Words.data(), S.data(), sizeof(ASTFileSignature));
+    for (WordT Word : Words) {
+      Stream.BackpatchWord(BitNo, Word);
+      BitNo += sizeof(WordT) * 8;
+    }
+  };
+
+  ASTFileSignature ASTBlockHash;
+  ASTFileSignature Signature;
+  std::tie(ASTBlockHash, Signature) = createSignature();
+
+  BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
+  BackpatchSignatureAt(Signature, SignatureOffset);
+
+  return Signature;
+}
+
+void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
+                                          ASTContext &Context) {
   using namespace llvm;
 
   // Flush first to prepare the PCM hash (signature).
   Stream.FlushToWord();
-  auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
+  UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3;
 
   // Enter the block and prepare to write records.
   RecordData Record;
   Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
 
   // For implicit modules, write the hash of the PCM as its signature.
-  ASTFileSignature Signature;
   if (WritingModule &&
       PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
-    ASTFileSignature ASTBlockHash;
-    auto ASTBlockStartByte = ASTBlockRange.first >> 3;
-    auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
-    std::tie(ASTBlockHash, Signature) = createSignature(
-        StringRef(Buffer.begin(), StartOfUnhashedControl),
-        StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
-
-    Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
-    Stream.EmitRecord(AST_BLOCK_HASH, Record);
+    // At this point, we don't know the actual signature of the file or the AST
+    // block - we're only able to compute those at the end of the serialization
+    // process. Let's store dummy signatures for now, and replace them with the
+    // real ones later on.
+    // The bitstream VBR-encodes record elements, which makes backpatching them
+    // really 
diff icult. Let's store the signatures as blobs instead - they are
+    // guaranteed to be word-aligned, and we control their format/encoding.
+    auto Dummy = ASTFileSignature::createDummy();
+    SmallString<128> Blob{Dummy.begin(), Dummy.end()};
+
+    auto Abbrev = std::make_shared<BitCodeAbbrev>();
+    Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+    unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+    Abbrev = std::make_shared<BitCodeAbbrev>();
+    Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+    unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+    Record.push_back(AST_BLOCK_HASH);
+    Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
+    ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
     Record.clear();
-    Record.append(Signature.begin(), Signature.end());
-    Stream.EmitRecord(SIGNATURE, Record);
+
+    Record.push_back(SIGNATURE);
+    Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
+    SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
     Record.clear();
   }
 
@@ -1232,7 +1279,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
 
   // Leave the options block.
   Stream.ExitBlock();
-  return Signature;
+  UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3;
 }
 
 /// Write the control block.
@@ -4691,8 +4738,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   ASTContext &Context = SemaRef.Context;
   Preprocessor &PP = SemaRef.PP;
 
+  // This needs to be done very early, since everything that writes
+  // SourceLocations or FileIDs depends on it.
   collectNonAffectingInputFiles();
 
+  writeUnhashedControlBlock(PP, Context);
+
   // Set up predefined declaration IDs.
   auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
     if (D) {
@@ -4838,7 +4889,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
   // Write the remaining AST contents.
   Stream.FlushToWord();
-  ASTBlockRange.first = Stream.GetCurrentBitNo();
+  ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3;
   Stream.EnterSubblock(AST_BLOCK_ID, 5);
   ASTBlockStartOffset = Stream.GetCurrentBitNo();
 
@@ -5191,13 +5242,13 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   Stream.EmitRecord(STATISTICS, Record);
   Stream.ExitBlock();
   Stream.FlushToWord();
-  ASTBlockRange.second = Stream.GetCurrentBitNo();
+  ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3;
 
   // Write the module file extension blocks.
   for (const auto &ExtWriter : ModuleFileExtensionWriters)
     WriteModuleFileExtension(SemaRef, *ExtWriter);
 
-  return writeUnhashedControlBlock(PP, Context);
+  return backpatchSignature();
 }
 
 void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {

diff  --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp
index d57f4cec2eab6f..92417c7bf1d524 100644
--- a/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -697,9 +697,12 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
     }
 
     // Get Signature.
-    if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
-      getModuleFileInfo(File).Signature = ASTFileSignature::create(
-          Record.begin(), Record.begin() + ASTFileSignature::size);
+    if (State == DiagnosticOptionsBlock && Code == SIGNATURE) {
+      auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end());
+      assert(Signature != ASTFileSignature::createDummy() &&
+             "Dummy AST file signature not backpatched in ASTWriter.");
+      getModuleFileInfo(File).Signature = Signature;
+    }
 
     // We don't care about this record.
   }

diff  --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c
index f0df1e4e78236f..11d7c5c9d43d53 100644
--- a/clang/test/Modules/ASTSignature.c
+++ b/clang/test/Modules/ASTSignature.c
@@ -1,6 +1,6 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
-// RUN:   -fimplicit-module-maps -fmodules-strict-context-hash \
+// RUN: %clang_cc1             -iquote %S/Inputs/ASTHash/ -fsyntax-only \
+// RUN:   -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
 // RUN:   -fmodules-cache-path=%t -fdisable-module-hash %s
 // RUN: cp %t/MyHeader2.pcm %t1.pcm
 // RUN: rm -rf %t
@@ -8,17 +8,18 @@
 // RUN:   -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
 // RUN:   -fmodules-cache-path=%t -fdisable-module-hash %s
 // RUN: cp %t/MyHeader2.pcm %t2.pcm
-// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
-// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
+// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump
+// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump
 // RUN: cat %t1.dump %t2.dump | FileCheck %s
 
 #include "my_header_2.h"
 
 my_int var = 42;
 
-// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
-// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
-// CHECK: [[AST_BLOCK_HASH]]
-// CHECK-NOT: [[SIGNATURE]]
-// The modules built by this test are designed to yield the same AST. If this
-// test fails, it means that the AST block is has become non-relocatable.
+// CHECK:     <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH:.*]]'
+// CHECK:     <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE:.*]]'
+// CHECK:     <AST_BLOCK_HASH abbrevid={{[0-9]+}}/> blob data = '[[AST_BLOCK_HASH]]'
+// CHECK-NOT: <SIGNATURE abbrevid={{[0-9]+}}/> blob data = '[[SIGNATURE]]'
+// The modules built by this test are designed to yield the same AST but distinct AST files.
+// If this test fails, it means that either the AST block has become non-relocatable,
+// or the file signature stopped hashing some parts of the AST file.

diff  --git a/clang/test/Modules/diag-mappings-affecting.c b/clang/test/Modules/diag-mappings-affecting.c
new file mode 100644
index 00000000000000..f485fe8e3f2e53
--- /dev/null
+++ b/clang/test/Modules/diag-mappings-affecting.c
@@ -0,0 +1,28 @@
+// Test that pruning non-affecting input files happens before serializing
+// diagnostic pragma mappings.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache \
+// RUN:   -I %t/include_a -I %t/include_textual -fsyntax-only %t/tu.c
+
+//--- tu.c
+#include "a1.h"
+
+//--- include_a/module.modulemap
+module A {
+  header "a1.h"
+  header "a2.h"
+}
+//--- include_a/a1.h
+#include "textual.h" // This will also load the non-affecting
+                     // include_textual/module.modulemap.
+#include "a2.h"
+//--- include_a/a2.h
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#pragma clang diagnostic pop
+
+//--- include_textual/module.modulemap
+//--- include_textual/textual.h


        


More information about the cfe-commits mailing list