[lld] r342334 - lld-link: Set PDB GUID to hash of PDB contents instead of to a random byte sequence.

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 15 11:37:22 PDT 2018


Author: nico
Date: Sat Sep 15 11:37:22 2018
New Revision: 342334

URL: http://llvm.org/viewvc/llvm-project?rev=342334&view=rev
Log:
lld-link: Set PDB GUID to hash of PDB contents instead of to a random byte sequence.

Previously, lld-link would use a random byte sequence as the PDB GUID. Instead,
use a hash of the PDB file contents.

To not disturb llvm-pdbutil pdb2yaml, the hash generation is an opt-in feature
on InfoStreamBuilder and ldb/COFF/PDB.cpp always sets it.

Since writing the PDB computes this ID which also goes in the exe, the PDB
writing code now must be called before writeBuildId(). writeBuildId() for that
reason is no longer included in the "Code Layout" timer.

Since the PDB GUID is now a function of the PDB contents, the PDB Age is always
set to 1. There was a long comment above loadExistingBuildId (now gone) about
how not changing the GUID and only incrementing the age was important, but
according to the discussion in PR35914 that comment was incorrect.

Differential Revision: https://reviews.llvm.org/D51956

Modified:
    lld/trunk/COFF/PDB.cpp
    lld/trunk/COFF/PDB.h
    lld/trunk/COFF/Writer.cpp
    lld/trunk/test/COFF/rsds.test

Modified: lld/trunk/COFF/PDB.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/PDB.cpp?rev=342334&r1=342333&r2=342334&view=diff
==============================================================================
--- lld/trunk/COFF/PDB.cpp (original)
+++ lld/trunk/COFF/PDB.cpp Sat Sep 15 11:37:22 2018
@@ -98,7 +98,7 @@ public:
   }
 
   /// Emit the basic PDB structure: initial streams, headers, etc.
-  void initialize(const llvm::codeview::DebugInfo &BuildId);
+  void initialize(llvm::codeview::DebugInfo *BuildId);
 
   /// Add natvis files specified on the command line.
   void addNatvisFiles();
@@ -130,8 +130,8 @@ public:
   void addSections(ArrayRef<OutputSection *> OutputSections,
                    ArrayRef<uint8_t> SectionTable);
 
-  /// Write the PDB to disk.
-  void commit();
+  /// Write the PDB to disk and store the Guid generated for it in *Guid.
+  void commit(codeview::GUID *Guid);
 
 private:
   BumpPtrAllocator Alloc;
@@ -380,8 +380,8 @@ tryToLoadPDB(const GUID &GuidFromObj, St
   return std::move(NS);
 }
 
-Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File,
-                                                               TypeServer2Record &TS) {
+Expected<const CVIndexMap &>
+PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, TypeServer2Record &TS) {
   const GUID &TSId = TS.getGuid();
   StringRef TSPath = TS.getName();
 
@@ -1230,7 +1230,7 @@ static void addLinkerModuleSectionSymbol
 void coff::createPDB(SymbolTable *Symtab,
                      ArrayRef<OutputSection *> OutputSections,
                      ArrayRef<uint8_t> SectionTable,
-                     const llvm::codeview::DebugInfo &BuildId) {
+                     llvm::codeview::DebugInfo *BuildId) {
   ScopedTimer T1(TotalPdbLinkTimer);
   PDBLinker PDB(Symtab);
 
@@ -1240,12 +1240,19 @@ void coff::createPDB(SymbolTable *Symtab
   PDB.addNatvisFiles();
 
   ScopedTimer T2(DiskCommitTimer);
-  PDB.commit();
+  codeview::GUID Guid;
+  PDB.commit(&Guid);
+  memcpy(&BuildId->PDB70.Signature, &Guid, 16);
 }
 
-void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) {
+void PDBLinker::initialize(llvm::codeview::DebugInfo *BuildId) {
   ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize
 
+  BuildId->Signature.CVSignature = OMF::Signature::PDB70;
+  // Signature is set to a hash of the PDB contents when the PDB is done.
+  memset(BuildId->PDB70.Signature, 0, 16);
+  BuildId->PDB70.Age = 1;
+
   // Create streams in MSF for predefined streams, namely
   // PDB, TPI, DBI and IPI.
   for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I)
@@ -1253,15 +1260,12 @@ void PDBLinker::initialize(const llvm::c
 
   // Add an Info stream.
   auto &InfoBuilder = Builder.getInfoBuilder();
-  GUID uuid;
-  memcpy(&uuid, &BuildId.PDB70.Signature, sizeof(uuid));
-  InfoBuilder.setAge(BuildId.PDB70.Age);
-  InfoBuilder.setGuid(uuid);
   InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70);
+  InfoBuilder.setHashPDBContentsToGUID(true);
 
   // Add an empty DBI stream.
   pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder();
-  DbiBuilder.setAge(BuildId.PDB70.Age);
+  DbiBuilder.setAge(BuildId->PDB70.Age);
   DbiBuilder.setVersionHeader(pdb::PdbDbiV70);
   DbiBuilder.setMachineType(Config->Machine);
   // Technically we are not link.exe 14.11, but there are known cases where
@@ -1305,9 +1309,9 @@ void PDBLinker::addSections(ArrayRef<Out
       DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable));
 }
 
-void PDBLinker::commit() {
+void PDBLinker::commit(codeview::GUID *Guid) {
   // Write to a file.
-  ExitOnErr(Builder.commit(Config->PDBPath));
+  ExitOnErr(Builder.commit(Config->PDBPath, Guid));
 }
 
 static Expected<StringRef>

Modified: lld/trunk/COFF/PDB.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/PDB.h?rev=342334&r1=342333&r2=342334&view=diff
==============================================================================
--- lld/trunk/COFF/PDB.h (original)
+++ lld/trunk/COFF/PDB.h Sat Sep 15 11:37:22 2018
@@ -28,7 +28,7 @@ class SymbolTable;
 void createPDB(SymbolTable *Symtab,
                llvm::ArrayRef<OutputSection *> OutputSections,
                llvm::ArrayRef<uint8_t> SectionTable,
-               const llvm::codeview::DebugInfo &BuildId);
+               llvm::codeview::DebugInfo *BuildId);
 
 std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C,
                                                  uint32_t Addr);

Modified: lld/trunk/COFF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Writer.cpp?rev=342334&r1=342333&r2=342334&view=diff
==============================================================================
--- lld/trunk/COFF/Writer.cpp (original)
+++ lld/trunk/COFF/Writer.cpp Sat Sep 15 11:37:22 2018
@@ -210,7 +210,6 @@ private:
   DebugDirectoryChunk *DebugDirectory = nullptr;
   std::vector<Chunk *> DebugRecords;
   CVDebugRecordChunk *BuildId = nullptr;
-  Optional<codeview::DebugInfo> PreviousBuildId;
   ArrayRef<uint8_t> SectionTable;
 
   uint64_t FileSize;
@@ -293,67 +292,6 @@ void OutputSection::writeHeaderTo(uint8_
 } // namespace coff
 } // namespace lld
 
-// PDBs are matched against executables using a build id which consists of three
-// components:
-//   1. A 16-bit GUID
-//   2. An age
-//   3. A time stamp.
-//
-// Debuggers and symbol servers match executables against debug info by checking
-// each of these components of the EXE/DLL against the corresponding value in
-// the PDB and failing a match if any of the components differ.  In the case of
-// symbol servers, symbols are cached in a folder that is a function of the
-// GUID.  As a result, in order to avoid symbol cache pollution where every
-// incremental build copies a new PDB to the symbol cache, we must try to re-use
-// the existing GUID if one exists, but bump the age.  This way the match will
-// fail, so the symbol cache knows to use the new PDB, but the GUID matches, so
-// it overwrites the existing item in the symbol cache rather than making a new
-// one.
-static Optional<codeview::DebugInfo> loadExistingBuildId(StringRef Path) {
-  // We don't need to incrementally update a previous build id if we're not
-  // writing codeview debug info.
-  if (!Config->Debug)
-    return None;
-
-  auto ExpectedBinary = llvm::object::createBinary(Path);
-  if (!ExpectedBinary) {
-    consumeError(ExpectedBinary.takeError());
-    return None;
-  }
-
-  auto Binary = std::move(*ExpectedBinary);
-  if (!Binary.getBinary()->isCOFF())
-    return None;
-
-  std::error_code EC;
-  COFFObjectFile File(Binary.getBinary()->getMemoryBufferRef(), EC);
-  if (EC)
-    return None;
-
-  // If the machine of the binary we're outputting doesn't match the machine
-  // of the existing binary, don't try to re-use the build id.
-  if (File.is64() != Config->is64() || File.getMachine() != Config->Machine)
-    return None;
-
-  for (const auto &DebugDir : File.debug_directories()) {
-    if (DebugDir.Type != IMAGE_DEBUG_TYPE_CODEVIEW)
-      continue;
-
-    const codeview::DebugInfo *ExistingDI = nullptr;
-    StringRef PDBFileName;
-    if (auto EC = File.getDebugPDBInfo(ExistingDI, PDBFileName)) {
-      (void)EC;
-      return None;
-    }
-    // We only support writing PDBs in v70 format.  So if this is not a build
-    // id that we recognize / support, ignore it.
-    if (ExistingDI->Signature.CVSignature != OMF::Signature::PDB70)
-      return None;
-    return *ExistingDI;
-  }
-  return None;
-}
-
 // The main function of the writer.
 void Writer::run() {
   ScopedTimer T1(CodeLayoutTimer);
@@ -372,9 +310,6 @@ void Writer::run() {
     fatal("image size (" + Twine(FileSize) + ") " +
         "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")");
 
-  // We must do this before opening the output file, as it depends on being able
-  // to read the contents of the existing output file.
-  PreviousBuildId = loadExistingBuildId(Config->OutputFile);
   openFile(Config->OutputFile);
   if (Config->is64()) {
     writeHeader<pe32plus_header>();
@@ -383,14 +318,14 @@ void Writer::run() {
   }
   writeSections();
   sortExceptionTable();
-  writeBuildId();
 
   T1.stop();
 
   if (!Config->PDBPath.empty() && Config->Debug) {
     assert(BuildId);
-    createPDB(Symtab, OutputSections, SectionTable, *BuildId->BuildId);
+    createPDB(Symtab, OutputSections, SectionTable, BuildId->BuildId);
   }
+  writeBuildId();
 
   writeMapFile(OutputSections);
 
@@ -1261,25 +1196,10 @@ void Writer::writeBuildId() {
   //    timestamp as well as a Guid and Age of the PDB.
   // 2) In all cases, the PE COFF file header also contains a timestamp.
   // For reproducibility, instead of a timestamp we want to use a hash of the
-  // binary, however when building with debug info the hash needs to take into
-  // account the debug info, since it's possible to add blank lines to a file
-  // which causes the debug info to change but not the generated code.
-  //
-  // To handle this, we first set the Guid and Age in the debug directory (but
-  // only if we're doing a debug build).  Then, we hash the binary (thus causing
-  // the hash to change if only the debug info changes, since the Age will be
-  // different).  Finally, we write that hash into the debug directory (if
-  // present) as well as the COFF file header (always).
+  // PE contents.
   if (Config->Debug) {
     assert(BuildId && "BuildId is not set!");
-    if (PreviousBuildId.hasValue()) {
-      *BuildId->BuildId = *PreviousBuildId;
-      BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1;
-    } else {
-      BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70;
-      BuildId->BuildId->PDB70.Age = 1;
-      llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16);
-    }
+    // BuildId->BuildId was filled in when the PDB was written.
   }
 
   // At this point the only fields in the COFF file which remain unset are the

Modified: lld/trunk/test/COFF/rsds.test
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/COFF/rsds.test?rev=342334&r1=342333&r2=342334&view=diff
==============================================================================
--- lld/trunk/test/COFF/rsds.test (original)
+++ lld/trunk/test/COFF/rsds.test Sat Sep 15 11:37:22 2018
@@ -1,9 +1,9 @@
 # RUN: yaml2obj %s > %t.obj
 
 # RUN: rm -f %t.dll %t.pdb
-# RUN: lld-link /debug /pdbaltpath:test1.pdb /dll /out:%t.dll /entry:DllMain %t.obj
+# RUN: lld-link /debug /pdbaltpath:test.pdb /dll /out:%t.dll /entry:DllMain %t.obj
 # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.1.txt
-# RUN: lld-link /debug /pdbaltpath:test2.pdb /dll /out:%t.dll /entry:DllMain %t.obj
+# RUN: lld-link /debug /pdbaltpath:test.pdb /dll /out:%t.dll /entry:DllMain %t.obj
 # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.2.txt
 # RUN: cat %t.1.txt %t.2.txt | FileCheck %s
 
@@ -12,7 +12,7 @@
 # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.3.txt
 # RUN: lld-link /debug /pdb:%t2.pdb /dll /out:%t.dll /entry:DllMain %t.obj
 # RUN: llvm-readobj -coff-debug-directory %t.dll > %t.4.txt
-# RUN: cat %t.3.txt %t.4.txt | FileCheck %s
+# RUN: cat %t.3.txt %t.4.txt | FileCheck --check-prefix TWOPDBS %s
 
 # RUN: rm -f %t.dll %t.pdb
 # RUN: lld-link /Brepro /dll /out:%t.dll /entry:DllMain %t.obj
@@ -37,7 +37,7 @@
 # CHECK:       PDBSignature: 0x53445352
 # CHECK:       PDBGUID: [[GUID:\(([A-Za-z0-9]{2} ?){16}\)]]
 # CHECK:       PDBAge: 1
-# CHECK:       PDBFileName: {{.*}}1.pdb
+# CHECK:       PDBFileName: {{.*}}.pdb
 # CHECK:     }
 # CHECK:   }
 # CHECK: ]
@@ -55,12 +55,51 @@
 # CHECK:     PDBInfo {
 # CHECK:       PDBSignature: 0x53445352
 # CHECK:       PDBGUID: [[GUID]]
-# CHECK:       PDBAge: 2
-# CHECK:       PDBFileName: {{.*}}2.pdb
+# CHECK:       PDBAge: 1
+# CHECK:       PDBFileName: {{.*}}.pdb
 # CHECK:     }
 # CHECK:   }
 # CHECK: ]
 
+# TWOPDBS: File: [[FILE:.*]].dll
+# TWOPDBS: DebugDirectory [
+# TWOPDBS:   DebugEntry {
+# TWOPDBS:     Characteristics: 0x0
+# TWOPDBS:     TimeDateStamp: 
+# TWOPDBS:     MajorVersion: 0x0
+# TWOPDBS:     MinorVersion: 0x0
+# TWOPDBS:     Type: CodeView (0x2)
+# TWOPDBS:     SizeOfData: 0x{{[^0]}}
+# TWOPDBS:     AddressOfRawData: 0x{{[^0]}}
+# TWOPDBS:     PointerToRawData: 0x{{[^0]}}
+# TWOPDBS:     PDBInfo {
+# TWOPDBS:       PDBSignature: 0x53445352
+# TWOPDBS:       PDBGUID: [[GUID:\(([A-Za-z0-9]{2} ?){16}\)]]
+# TWOPDBS:       PDBAge: 1
+# TWOPDBS:       PDBFileName: {{.*}}.pdb
+# TWOPDBS:     }
+# TWOPDBS:   }
+# TWOPDBS: ]
+# TWOPDBS: File: [[FILE]].dll
+# TWOPDBS: DebugDirectory [
+# TWOPDBS:   DebugEntry {
+# TWOPDBS:     Characteristics: 0x0
+# TWOPDBS:     TimeDateStamp: 
+# TWOPDBS:     MajorVersion: 0x0
+# TWOPDBS:     MinorVersion: 0x0
+# TWOPDBS:     Type: CodeView (0x2)
+# TWOPDBS:     SizeOfData: 0x{{[^0]}}
+# TWOPDBS:     AddressOfRawData: 0x{{[^0]}}
+# TWOPDBS:     PointerToRawData: 0x{{[^0]}}
+# TWOPDBS:     PDBInfo {
+# TWOPDBS:       PDBSignature: 0x53445352
+# TWOPDBS-NOT:       PDBGUID: [[GUID]]
+# TWOPDBS:       PDBAge: 1
+# TWOPDBS:       PDBFileName: {{.*}}.pdb
+# TWOPDBS:     }
+# TWOPDBS:   }
+# TWOPDBS: ]
+
 # REPRO: File: {{.*}}.dll
 # REPRO: DebugDirectory [
 # REPRO:   DebugEntry {




More information about the llvm-commits mailing list