[llvm] r267928 - Read the rest of the DBI substreams, and parse source info.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 28 13:05:19 PDT 2016


Author: zturner
Date: Thu Apr 28 15:05:18 2016
New Revision: 267928

URL: http://llvm.org/viewvc/llvm-project?rev=267928&view=rev
Log:
Read the rest of the DBI substreams, and parse source info.

We now read out the rest of the substreams from the DBI streams.  One of
these substreams, the FileInfo substream, contains information about which
source files contribute to each module (aka compiland).  This patch
additionally parses out the file information from that substream, and
dumps it in llvm-pdbdump.

Differential Revision: http://reviews.llvm.org/D19634
Reviewed by: ruiu

Modified:
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ModInfo.h
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h
    llvm/trunk/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp
    llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
    llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ModInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ModInfo.h?rev=267928&r1=267927&r2=267928&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ModInfo.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/ModInfo.h Thu Apr 28 15:05:18 2016
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 
 #include <stdint.h>
+#include <vector>
 
 namespace llvm {
 class PDBFile;
@@ -42,6 +43,13 @@ private:
   const FileLayout *Layout;
 };
 
+struct ModuleInfoEx {
+  ModuleInfoEx(ModInfo Module) : Info(Module) {}
+
+  ModInfo Info;
+  std::vector<StringRef> SourceFiles;
+};
+
 class ModInfoIterator {
 public:
   ModInfoIterator(const uint8_t *Stream);

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h?rev=267928&r1=267927&r2=267928&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBDbiStream.h Thu Apr 28 15:05:18 2016
@@ -43,13 +43,23 @@ public:
 
   PDB_Machine getMachineType() const;
 
-  llvm::iterator_range<ModInfoIterator> modules() const;
+  ArrayRef<ModuleInfoEx> modules() const;
 
 private:
+  std::error_code readSubstream(std::vector<uint8_t> &Bytes, uint32_t Size);
+  std::error_code initializeFileInfo();
+
   PDBFile &Pdb;
   PDBStream Stream;
 
+  std::vector<ModuleInfoEx> ModuleInfos;
+
   std::vector<uint8_t> ModInfoSubstream;
+  std::vector<uint8_t> SecContrSubstream;
+  std::vector<uint8_t> SecMapSubstream;
+  std::vector<uint8_t> FileInfoSubstream;
+  std::vector<uint8_t> TypeServerMapSubstream;
+  std::vector<uint8_t> ECSubstream;
   std::unique_ptr<HeaderInfo> Header;
 };
 }

Modified: llvm/trunk/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp?rev=267928&r1=267927&r2=267928&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Raw/PDBDbiStream.cpp Thu Apr 28 15:05:18 2016
@@ -48,16 +48,16 @@ struct PDBDbiStream::HeaderInfo {
   little32_t VersionSignature;
   ulittle32_t VersionHeader;
   ulittle32_t Age; // Should match PDBInfoStream.
-  ulittle16_t GSSyms;
-  ulittle16_t BuildNumber; // See DbiBuildNo structure.
-  ulittle16_t PSSyms;
+  ulittle16_t GSSyms;               // Number of global symbols
+  ulittle16_t BuildNumber;          // See DbiBuildNo structure.
+  ulittle16_t PSSyms;               // Number of public symbols
   ulittle16_t PdbDllVersion;        // version of mspdbNNN.dll
   ulittle16_t SymRecords;           // Number of symbols
   ulittle16_t PdbDllRbld;           // rbld number of mspdbNNN.dll
   little32_t ModiSubstreamSize;     // Size of module info stream
   little32_t SecContrSubstreamSize; // Size of sec. contribution stream
-  little32_t SectionMapSize;
-  little32_t FileInfoSize;
+  little32_t SectionMapSize;        // Size of sec. map substream
+  little32_t FileInfoSize;          // Size of file info substream
   little32_t TypeServerSize;      // Size of type server map
   ulittle32_t MFCTypeServerIndex; // Index of MFC Type Server
   little32_t OptionalDbgHdrSize;  // Size of DbgHeader info
@@ -101,12 +101,42 @@ std::error_code PDBDbiStream::reload() {
           Header->OptionalDbgHdrSize + Header->ECSubstreamSize)
     return std::make_error_code(std::errc::illegal_byte_sequence);
 
+  // Only certain substreams are guaranteed to be aligned.  Validate
+  // them here.
   if (Header->ModiSubstreamSize % sizeof(uint32_t) != 0)
     return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (Header->SecContrSubstreamSize % sizeof(uint32_t) != 0)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (Header->SectionMapSize % sizeof(uint32_t) != 0)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (Header->FileInfoSize % sizeof(uint32_t) != 0)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (Header->TypeServerSize % sizeof(uint32_t) != 0)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  std::error_code EC;
+  if (EC = readSubstream(ModInfoSubstream, Header->ModiSubstreamSize))
+    return EC;
+
+  // Since each ModInfo in the stream is a variable length, we have to iterate
+  // them to know how many there actually are.
+  auto Range = llvm::make_range(ModInfoIterator(&ModInfoSubstream.front()),
+                                ModInfoIterator(&ModInfoSubstream.back() + 1));
+  for (auto Info : Range)
+    ModuleInfos.push_back(ModuleInfoEx(Info));
+
+  if (EC = readSubstream(SecContrSubstream, Header->SecContrSubstreamSize))
+    return EC;
+  if (EC = readSubstream(SecMapSubstream, Header->SectionMapSize))
+    return EC;
+  if (EC = readSubstream(FileInfoSubstream, Header->FileInfoSize))
+    return EC;
+  if (EC = readSubstream(TypeServerMapSubstream, Header->TypeServerSize))
+    return EC;
+  if (EC = readSubstream(ECSubstream, Header->ECSubstreamSize))
+    return EC;
 
-  ModInfoSubstream.resize(Header->ModiSubstreamSize);
-  if (auto EC =
-          Stream.readBytes(&ModInfoSubstream[0], Header->ModiSubstreamSize))
+  if (EC = initializeFileInfo())
     return EC;
 
   return std::error_code();
@@ -150,7 +180,90 @@ PDB_Machine PDBDbiStream::getMachineType
   return static_cast<PDB_Machine>(Machine);
 }
 
-llvm::iterator_range<ModInfoIterator> PDBDbiStream::modules() const {
-  return llvm::make_range(ModInfoIterator(&ModInfoSubstream.front()),
-                          ModInfoIterator(&ModInfoSubstream.back() + 1));
+ArrayRef<ModuleInfoEx> PDBDbiStream::modules() const { return ModuleInfos; }
+
+std::error_code PDBDbiStream::readSubstream(std::vector<uint8_t> &Bytes, uint32_t Size) {
+  Bytes.clear();
+  if (Size == 0)
+    return std::error_code();
+
+  Bytes.resize(Size);
+  return Stream.readBytes(&Bytes[0], Size);
+}
+
+std::error_code PDBDbiStream::initializeFileInfo() {
+  struct FileInfoSubstreamHeader {
+    ulittle16_t NumModules;     // Total # of modules, should match number of
+                                // records in the ModuleInfo substream.
+    ulittle16_t NumSourceFiles; // Total # of source files.  This value is not
+                                // accurate because PDB actually supports more
+                                // than 64k source files, so we ignore it and
+                                // compute the value from other stream fields.
+  };
+
+  // The layout of the FileInfoSubstream is like this:
+  // struct {
+  //   ulittle16_t NumModules;
+  //   ulittle16_t NumSourceFiles;
+  //   ulittle16_t ModIndices[NumModules];
+  //   ulittle16_t ModFileCounts[NumModules];
+  //   ulittle32_t FileNameOffsets[NumSourceFiles];
+  //   char Names[][NumSourceFiles];
+  // };
+  // with the caveat that `NumSourceFiles` cannot be trusted, so
+  // it is computed by summing `ModFileCounts`.
+  //
+  const uint8_t *Buf = &FileInfoSubstream[0];
+  auto FI = reinterpret_cast<const FileInfoSubstreamHeader *>(Buf);
+  Buf += sizeof(FileInfoSubstreamHeader);
+  // The number of modules in the stream should be the same as reported by
+  // the FileInfoSubstreamHeader.
+  if (FI->NumModules != ModuleInfos.size())
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  // First is an array of `NumModules` module indices.  This is not used for the
+  // same reason that `NumSourceFiles` is not used.  It's an array of uint16's,
+  // but it's possible there are more than 64k source files, which would imply
+  // more than 64k modules (e.g. object files) as well.  So we ignore this
+  // field.
+  llvm::ArrayRef<ulittle16_t> ModIndexArray(
+      reinterpret_cast<const ulittle16_t *>(Buf), ModuleInfos.size());
+
+  llvm::ArrayRef<ulittle16_t> ModFileCountArray(ModIndexArray.end(),
+                                                ModuleInfos.size());
+
+  // Compute the real number of source files.
+  uint32_t NumSourceFiles = 0;
+  for (auto Count : ModFileCountArray)
+    NumSourceFiles += Count;
+
+  // This is the array that in the reference implementation corresponds to
+  // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a
+  // pointer. Due to the mentioned problems of pointers causing difficulty
+  // when reading from the file on 64-bit systems, we continue to ignore that
+  // field in `ModInfo`, and instead build a vector of StringRefs and stores
+  // them in `ModuleInfoEx`.  The value written to and read from the file is
+  // not used anyway, it is only there as a way to store the offsets for the
+  // purposes of later accessing the names at runtime.
+  llvm::ArrayRef<little32_t> FileNameOffsets(
+      reinterpret_cast<const little32_t *>(ModFileCountArray.end()),
+      NumSourceFiles);
+
+  const char *Names = reinterpret_cast<const char *>(FileNameOffsets.end());
+
+  // We go through each ModuleInfo, determine the number N of source files for
+  // that module, and then get the next N offsets from the Offsets array, using
+  // them to get the corresponding N names from the Names buffer and associating
+  // each one with the corresponding module.
+  uint32_t NextFileIndex = 0;
+  for (size_t I = 0; I < ModuleInfos.size(); ++I) {
+    uint32_t NumFiles = ModFileCountArray[I];
+    ModuleInfos[I].SourceFiles.resize(NumFiles);
+    for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) {
+      uint32_t FileIndex = FileNameOffsets[NextFileIndex];
+      ModuleInfos[I].SourceFiles[J] = StringRef(Names + FileIndex);
+    }
+  }
+
+  return std::error_code();
 }

Modified: llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test?rev=267928&r1=267927&r2=267928&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test (original)
+++ llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test Thu Apr 28 15:05:18 2016
@@ -41,6 +41,8 @@
 ; CHECK-NEXT:     Symbol Byte Size: 208
 ; CHECK-NEXT:     Type Server Index: 0
 ; CHECK-NEXT:     Has EC Info: 0
+; CHECK-NEXT:     1 Contributing Source Files:
+; CHECK-NEXT:         d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
 ; CHECK-NEXT: * Linker *
 ; CHECK-NEXT:     Debug Stream Index: 14
 ; CHECK-NEXT:     Object File:
@@ -52,3 +54,4 @@
 ; CHECK-NEXT:     Symbol Byte Size: 516
 ; CHECK-NEXT:     Type Server Index: 0
 ; CHECK-NEXT:     Has EC Info: 0
+; CHECK-NEXT:     0 Contributing Source Files:

Modified: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp?rev=267928&r1=267927&r2=267928&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp (original)
+++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp Thu Apr 28 15:05:18 2016
@@ -281,25 +281,30 @@ static void dumpStructure(RawSession &RS
          << Minor << "." << DbiStream.getPdbDllVersion() << '\n';
 
   outs() << "Modules: \n";
-  for (auto Modi : DbiStream.modules()) {
-    outs() << Modi.getModuleName() << '\n';
-    outs().indent(4) << "Debug Stream Index: " << Modi.getModuleStreamIndex()
-                     << '\n';
-    outs().indent(4) << "Object File: " << Modi.getObjFileName() << '\n';
-    outs().indent(4) << "Num Files: " << Modi.getNumberOfFiles() << '\n';
+  for (auto &Modi : DbiStream.modules()) {
+    outs() << Modi.Info.getModuleName() << '\n';
+    outs().indent(4) << "Debug Stream Index: "
+                     << Modi.Info.getModuleStreamIndex() << '\n';
+    outs().indent(4) << "Object File: " << Modi.Info.getObjFileName() << '\n';
+    outs().indent(4) << "Num Files: " << Modi.Info.getNumberOfFiles() << '\n';
     outs().indent(4) << "Source File Name Idx: "
-                     << Modi.getSourceFileNameIndex() << '\n';
-    outs().indent(4) << "Pdb File Name Idx: " << Modi.getPdbFilePathNameIndex()
-                     << '\n';
-    outs().indent(4) << "Line Info Byte Size: " << Modi.getLineInfoByteSize()
-                     << '\n';
+                     << Modi.Info.getSourceFileNameIndex() << '\n';
+    outs().indent(4) << "Pdb File Name Idx: "
+                     << Modi.Info.getPdbFilePathNameIndex() << '\n';
+    outs().indent(4) << "Line Info Byte Size: "
+                     << Modi.Info.getLineInfoByteSize() << '\n';
     outs().indent(4) << "C13 Line Info Byte Size: "
-                     << Modi.getC13LineInfoByteSize() << '\n';
+                     << Modi.Info.getC13LineInfoByteSize() << '\n';
     outs().indent(4) << "Symbol Byte Size: "
-                     << Modi.getSymbolDebugInfoByteSize() << '\n';
-    outs().indent(4) << "Type Server Index: " << Modi.getTypeServerIndex()
+                     << Modi.Info.getSymbolDebugInfoByteSize() << '\n';
+    outs().indent(4) << "Type Server Index: " << Modi.Info.getTypeServerIndex()
                      << '\n';
-    outs().indent(4) << "Has EC Info: " << Modi.hasECInfo() << '\n';
+    outs().indent(4) << "Has EC Info: " << Modi.Info.hasECInfo() << '\n';
+    outs().indent(4) << Modi.SourceFiles.size()
+                     << " Contributing Source Files: \n";
+    for (auto File : Modi.SourceFiles) {
+      outs().indent(8) << File << '\n';
+    }
   }
 }
 




More information about the llvm-commits mailing list