[llvm] r277388 - [msf] Teach LLVM to parse a split Fpm.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 1 14:19:46 PDT 2016


Author: zturner
Date: Mon Aug  1 16:19:45 2016
New Revision: 277388

URL: http://llvm.org/viewvc/llvm-project?rev=277388&view=rev
Log:
[msf] Teach LLVM to parse a split Fpm.

The FPM is split at regular intervals across the MSF file, as the MS code
suggests. It turns out that the value of the interval is precisely the
block size. If the block size is 4096, then there are two Fpm pages every
4096 blocks.

So here we teach the PDBFile class to parse a split FPM, and also add more
options when dumping the FPM to display some additional information such
as orphaned pages (pages which the FPM says are allocated, but which
nothing appears to use), use after free pages (pages which the FPM says
are not allocated, but which are referenced by a stream), and multiple use
pages (pages which the FPM says are allocated but are used more than
once).

Reviewed By: ruiu
Differential Revision: https://reviews.llvm.org/D23022

Modified:
    llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h
    llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp
    llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
    llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp
    llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h
    llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp
    llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h

Modified: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/PDBFile.h Mon Aug  1 16:19:45 2016
@@ -67,6 +67,8 @@ public:
   Error setBlockData(uint32_t BlockIndex, uint32_t Offset,
                      ArrayRef<uint8_t> Data) const override;
 
+  ArrayRef<uint32_t> getFpmPages() const { return FpmPages; }
+
   ArrayRef<support::ulittle32_t> getStreamSizes() const {
     return ContainerLayout.StreamSizes;
   }
@@ -95,6 +97,7 @@ private:
 
   std::unique_ptr<msf::ReadableStream> Buffer;
 
+  std::vector<uint32_t> FpmPages;
   msf::MSFLayout ContainerLayout;
 
   std::unique_ptr<InfoStream> Info;

Modified: llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp (original)
+++ llvm/trunk/lib/DebugInfo/PDB/Raw/PDBFile.cpp Mon Aug  1 16:19:45 2016
@@ -121,14 +121,41 @@ Error PDBFile::parseFileHeaders() {
   ContainerLayout.SB = SB;
 
   // Initialize Free Page Map.
-  ContainerLayout.FreePageMap.resize(getBlockSize() * 8);
-  uint64_t FPMOffset = SB->FreeBlockMapBlock * getBlockSize();
-  ArrayRef<uint8_t> FPMBlock;
-  if (auto EC = Buffer->readBytes(FPMOffset, getBlockSize(), FPMBlock))
-    return EC;
-  for (uint32_t I = 0, E = getBlockSize() * 8; I != E; ++I)
-    if (FPMBlock[I / 8] & (1 << (I % 8)))
-      ContainerLayout.FreePageMap[I] = true;
+  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
+  ArrayRef<uint8_t> FpmBytes;
+  // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
+  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
+  // thusly an equal number of total blocks in the file.  For a block size
+  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
+  // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
+  // the Fpm is split across the file at `getBlockSize()` intervals.  As a
+  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
+  // for any non-negative integer k is an Fpm block.  In theory, we only really
+  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
+  // current versions of the MSF format already expect the Fpm to be arranged
+  // at getBlockSize() intervals, so we have to be compatible.
+  // See the function fpmPn() for more information:
+  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
+
+  uint32_t BlocksPerSection = getBlockSize();
+  uint64_t FpmBlockOffset = SB->FreeBlockMapBlock;
+  uint32_t BlocksRemaining = getBlockCount();
+  for (uint32_t SI = 0; BlocksRemaining > 0; ++SI) {
+    uint32_t FpmFileOffset = FpmBlockOffset * getBlockSize();
+
+    if (auto EC = Buffer->readBytes(FpmFileOffset, getBlockSize(), FpmBytes))
+      return EC;
+
+    uint32_t BlocksThisSection = std::min(BlocksRemaining, BlocksPerSection);
+    for (uint32_t I = 0; I < BlocksThisSection; ++I) {
+      uint32_t BI = I + BlocksPerSection * SI;
+
+      if (FpmBytes[I / 8] & (1 << (I % 8)))
+        ContainerLayout.FreePageMap[BI] = true;
+    }
+    BlocksRemaining -= BlocksThisSection;
+    FpmBlockOffset += BlocksPerSection;
+  }
 
   Reader.setOffset(getBlockMapOffset());
   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,

Modified: llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test (original)
+++ llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test Mon Aug  1 16:19:45 2016
@@ -2,7 +2,7 @@
 ; RUN:              -sym-record-bytes -publics -module-files -stream-name=/names \
 ; RUN:              -stream-summary -stream-blocks -ipi-records -ipi-record-bytes \
 ; RUN:              -section-contribs -section-map -section-headers -line-info \
-; RUN:              -tpi-hash -fpo -fpm %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
+; RUN:              -tpi-hash -fpo -page-stats %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
 ; RUN: llvm-pdbdump raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
 ; RUN: llvm-pdbdump raw -headers -stream-name=/names -modules -module-files \
 ; RUN:              %p/Inputs/big-read.pdb | FileCheck -check-prefix=BIG %s
@@ -38,7 +38,10 @@
 ; EMPTY-NEXT:   Stream 15: [TPI Hash] (308 bytes)
 ; EMPTY-NEXT:   Stream 16: [IPI Hash] (68 bytes)
 ; EMPTY-NEXT: ]
-; EMPTY-NEXT: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+; EMPTY-NEXT: Msf Free Pages: [3, 4, 5, 8, 9]
+; EMPTY-NEXT: Orphaned Pages: []
+; EMPTY-NEXT: Multiply Used Pages: []
+; EMPTY-NEXT: Use After Free Pages: [8]
 ; EMPTY-NEXT: StreamBlocks [
 ; EMPTY-NEXT:   Stream 0: [8]
 ; EMPTY-NEXT:   Stream 1: [19]
@@ -974,7 +977,10 @@
 ; ALL:   Stream 15: [TPI Hash] (308 bytes)
 ; ALL:   Stream 16: [IPI Hash] (68 bytes)
 ; ALL: ]
-; ALL: Used Page Map: [0, 1, 2, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+; ALL: Msf Free Pages: [3, 4, 5, 8, 9]
+; ALL: Orphaned Pages: []
+; ALL: Multiply Used Pages: []
+; ALL: Use After Free Pages: [8]
 ; ALL: StreamBlocks [
 ; ALL:   Stream 0: [8]
 ; ALL:   Stream 1: [19]

Modified: llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp (original)
+++ llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.cpp Mon Aug  1 16:19:45 2016
@@ -35,6 +35,48 @@ using namespace llvm::codeview;
 using namespace llvm::msf;
 using namespace llvm::pdb;
 
+namespace {
+struct PageStats {
+  explicit PageStats(const BitVector &FreePages)
+      : Upm(FreePages), ActualUsedPages(FreePages.size()),
+        MultiUsePages(FreePages.size()), UseAfterFreePages(FreePages.size()) {
+    const_cast<BitVector &>(Upm).flip();
+    // To calculate orphaned pages, we start with the set of pages that the
+    // MSF thinks are used.  Each time we find one that actually *is* used,
+    // we unset it.  Whichever bits remain set at the end are orphaned.
+    OrphanedPages = Upm;
+  }
+
+  // The inverse of the MSF File's copy of the Fpm.  The basis for which we
+  // determine the allocation status of each page.
+  const BitVector Upm;
+
+  // Pages which are marked as used in the FPM and are used at least once.
+  BitVector ActualUsedPages;
+
+  // Pages which are marked as used in the FPM but are used more than once.
+  BitVector MultiUsePages;
+
+  // Pages which are marked as used in the FPM but are not used at all.
+  BitVector OrphanedPages;
+
+  // Pages which are marked free in the FPM but are used.
+  BitVector UseAfterFreePages;
+};
+}
+
+static void recordKnownUsedPage(PageStats &Stats, uint32_t UsedIndex) {
+  if (Stats.Upm.test(UsedIndex)) {
+    if (Stats.ActualUsedPages.test(UsedIndex))
+      Stats.MultiUsePages.set(UsedIndex);
+    Stats.ActualUsedPages.set(UsedIndex);
+    Stats.OrphanedPages.reset(UsedIndex);
+  } else {
+    // The MSF doesn't think this page is used, but it is.
+    Stats.UseAfterFreePages.set(UsedIndex);
+  }
+}
+
 static void printSectionOffset(llvm::raw_ostream &OS,
                                const SectionOffset &Off) {
   OS << Off.Off << ", " << Off.Isect;
@@ -238,21 +280,53 @@ Error LLVMOutputStyle::dumpStreamSummary
 }
 
 Error LLVMOutputStyle::dumpFreePageMap() {
-  if (!opts::raw::DumpFreePageMap)
+  if (!opts::raw::DumpPageStats)
     return Error::success();
-  const BitVector &FPM = File.getMsfLayout().FreePageMap;
-
-  std::vector<uint32_t> Vec;
-  for (uint32_t I = 0, E = FPM.size(); I != E; ++I)
-    if (!FPM[I])
-      Vec.push_back(I);
 
-  // Prints out used pages instead of free pages because
+  // Start with used pages instead of free pages because
   // the number of free pages is far larger than used pages.
-  P.printList("Used Page Map", Vec);
+  BitVector FPM = File.getMsfLayout().FreePageMap;
+
+  PageStats PS(FPM);
+
+  recordKnownUsedPage(PS, 0); // MSF Super Block
+
+  uint32_t BlocksPerSection = File.getBlockSize();
+  uint32_t NumSections =
+      llvm::alignTo(File.getBlockCount(), BlocksPerSection) / BlocksPerSection;
+  for (uint32_t I = 0; I < NumSections; ++I) {
+    uint32_t Fpm0 = 1 + BlocksPerSection * I;
+    // 2 Fpm blocks spaced at `getBlockSize()` block intervals
+    recordKnownUsedPage(PS, Fpm0);
+    recordKnownUsedPage(PS, Fpm0 + 1);
+  }
+
+  recordKnownUsedPage(PS, File.getBlockMapIndex()); // Stream Table
+
+  for (auto DB : File.getDirectoryBlockArray()) {
+    recordKnownUsedPage(PS, DB);
+  }
+  for (auto &SE : File.getStreamMap()) {
+    for (auto &S : SE) {
+      recordKnownUsedPage(PS, S);
+    }
+  }
+
+  dumpBitVector("Msf Free Pages", FPM);
+  dumpBitVector("Orphaned Pages", PS.OrphanedPages);
+  dumpBitVector("Multiply Used Pages", PS.MultiUsePages);
+  dumpBitVector("Use After Free Pages", PS.UseAfterFreePages);
   return Error::success();
 }
 
+void LLVMOutputStyle::dumpBitVector(StringRef Name, const BitVector &V) {
+  std::vector<uint32_t> Vec;
+  for (uint32_t I = 0, E = V.size(); I != E; ++I)
+    if (V[I])
+      Vec.push_back(I);
+  P.printList(Name, Vec);
+}
+
 Error LLVMOutputStyle::dumpStreamBlocks() {
   if (!opts::raw::DumpStreamBlocks)
     return Error::success();

Modified: llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h (original)
+++ llvm/trunk/tools/llvm-pdbdump/LLVMOutputStyle.h Mon Aug  1 16:19:45 2016
@@ -16,6 +16,7 @@
 #include "llvm/Support/ScopedPrinter.h"
 
 namespace llvm {
+class BitVector;
 namespace pdb {
 class LLVMOutputStyle : public OutputStyle {
 public:
@@ -39,6 +40,8 @@ private:
   Error dumpSectionHeaders();
   Error dumpFpoStream();
 
+  void dumpBitVector(StringRef Name, const BitVector &V);
+
   void flush();
 
   PDBFile &File;

Modified: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp (original)
+++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp Mon Aug  1 16:19:45 2016
@@ -167,8 +167,10 @@ cl::opt<bool> DumpStreamBlocks("stream-b
 cl::opt<bool> DumpStreamSummary("stream-summary",
                                 cl::desc("dump summary of the PDB streams"),
                                 cl::cat(MsfOptions), cl::sub(RawSubcommand));
-cl::opt<bool> DumpFreePageMap("fpm", cl::desc("dump free page bitmap"),
-                              cl::cat(MsfOptions), cl::sub(RawSubcommand));
+cl::opt<bool> DumpPageStats(
+    "page-stats",
+    cl::desc("dump allocation stats of the pages in the MSF file"),
+    cl::cat(MsfOptions), cl::sub(RawSubcommand));
 
 // TYPE OPTIONS
 cl::opt<bool>
@@ -544,7 +546,7 @@ int main(int argc_, const char *argv_[])
     opts::raw::DumpPublics = true;
     opts::raw::DumpSectionHeaders = true;
     opts::raw::DumpStreamSummary = true;
-    opts::raw::DumpFreePageMap = true;
+    opts::raw::DumpPageStats = true;
     opts::raw::DumpStreamBlocks = true;
     opts::raw::DumpTpiRecords = true;
     opts::raw::DumpTpiHash = true;

Modified: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h?rev=277388&r1=277387&r2=277388&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h (original)
+++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.h Mon Aug  1 16:19:45 2016
@@ -37,7 +37,7 @@ namespace raw {
 extern llvm::cl::opt<bool> DumpHeaders;
 extern llvm::cl::opt<bool> DumpStreamBlocks;
 extern llvm::cl::opt<bool> DumpStreamSummary;
-extern llvm::cl::opt<bool> DumpFreePageMap;
+extern llvm::cl::opt<bool> DumpPageStats;
 extern llvm::cl::opt<bool> DumpTpiHash;
 extern llvm::cl::opt<bool> DumpTpiRecordBytes;
 extern llvm::cl::opt<bool> DumpTpiRecords;




More information about the llvm-commits mailing list