[llvm] r293795 - [pdb] Add a new command for analyzing hash collisions.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 1 10:30:23 PST 2017


Author: zturner
Date: Wed Feb  1 12:30:22 2017
New Revision: 293795

URL: http://llvm.org/viewvc/llvm-project?rev=293795&view=rev
Log:
[pdb] Add a new command for analyzing hash collisions.

This introduces the `analyze` subcommand.  For now there is only
one option, to analyze hash collisions in the type streams.  In
the future, however, we could add many more things here, such
as performing size analyses, compacting, and statistics about
the type of records etc.

Added:
    llvm/trunk/tools/llvm-pdbdump/Analyze.cpp
    llvm/trunk/tools/llvm-pdbdump/Analyze.h
Modified:
    llvm/trunk/include/llvm/DebugInfo/CodeView/TypeDatabase.h
    llvm/trunk/lib/DebugInfo/CodeView/TypeDatabase.cpp
    llvm/trunk/tools/llvm-pdbdump/CMakeLists.txt
    llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeDatabase.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/CodeView/TypeDatabase.h?rev=293795&r1=293794&r2=293795&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeDatabase.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeDatabase.h Wed Feb  1 12:30:22 2017
@@ -34,6 +34,8 @@ public:
 
   StringRef getTypeName(TypeIndex Index) const;
 
+  const CVType &getTypeRecord(TypeIndex Index) const;
+
   bool containsTypeIndex(TypeIndex Index) const;
 
   uint32_t size() const;

Modified: llvm/trunk/lib/DebugInfo/CodeView/TypeDatabase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/CodeView/TypeDatabase.cpp?rev=293795&r1=293794&r2=293795&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/CodeView/TypeDatabase.cpp (original)
+++ llvm/trunk/lib/DebugInfo/CodeView/TypeDatabase.cpp Wed Feb  1 12:30:22 2017
@@ -106,6 +106,10 @@ StringRef TypeDatabase::getTypeName(Type
   return "<unknown UDT>";
 }
 
+const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const {
+  return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex];
+}
+
 bool TypeDatabase::containsTypeIndex(TypeIndex Index) const {
   uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex;
   return I < CVUDTNames.size();

Added: llvm/trunk/tools/llvm-pdbdump/Analyze.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/Analyze.cpp?rev=293795&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/Analyze.cpp (added)
+++ llvm/trunk/tools/llvm-pdbdump/Analyze.cpp Wed Feb  1 12:30:22 2017
@@ -0,0 +1,164 @@
+//===- Analyze.cpp - PDB analysis functions ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Analyze.h"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <list>
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+static StringRef getLeafTypeName(TypeLeafKind LT) {
+  switch (LT) {
+#define TYPE_RECORD(ename, value, name)                                        \
+  case ename:                                                                  \
+    return #name;
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+  default:
+    break;
+  }
+  return "UnknownLeaf";
+}
+
+namespace {
+struct HashLookupVisitor : public TypeVisitorCallbacks {
+  struct Entry {
+    TypeIndex TI;
+    CVType Record;
+  };
+
+  explicit HashLookupVisitor(TpiStream &Tpi) : Tpi(Tpi) {}
+
+  Error visitTypeBegin(CVType &Record) override {
+    uint32_t H = Tpi.getHashValues()[I];
+    Record.Hash = H;
+    TypeIndex TI(I + TypeIndex::FirstNonSimpleIndex);
+    Lookup[H].push_back(Entry{TI, Record});
+    ++I;
+    return Error::success();
+  }
+
+  uint32_t I = 0;
+  DenseMap<uint32_t, std::list<Entry>> Lookup;
+  TpiStream &Tpi;
+};
+}
+
+AnalysisStyle::AnalysisStyle(PDBFile &File) : File(File) {}
+
+Error AnalysisStyle::dump() {
+  auto Tpi = File.getPDBTpiStream();
+  if (!Tpi)
+    return Tpi.takeError();
+
+  TypeDatabase TypeDB;
+  TypeDatabaseVisitor DBV(TypeDB);
+  TypeDeserializer Deserializer;
+  TypeVisitorCallbackPipeline Pipeline;
+  HashLookupVisitor Hasher(*Tpi);
+  // Deserialize the types
+  Pipeline.addCallbackToPipeline(Deserializer);
+  // Add them to the database
+  Pipeline.addCallbackToPipeline(DBV);
+  // Store their hash values
+  Pipeline.addCallbackToPipeline(Hasher);
+
+  CVTypeVisitor Visitor(Pipeline);
+
+  bool Error = false;
+  for (auto Item : Tpi->types(&Error)) {
+    if (auto EC = Visitor.visitTypeRecord(Item))
+      return EC;
+  }
+  if (Error)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "TPI stream contained corrupt record");
+
+  auto &Adjusters = Tpi->getHashAdjusters();
+  DenseSet<uint32_t> AdjusterSet;
+  for (const auto &Adj : Adjusters) {
+    assert(AdjusterSet.find(Adj.second) == AdjusterSet.end());
+    AdjusterSet.insert(Adj.second);
+  }
+
+  uint32_t Count = 0;
+  outs() << "Searching for hash collisions\n";
+  for (const auto &H : Hasher.Lookup) {
+    if (H.second.size() <= 1)
+      continue;
+    ++Count;
+    outs() << formatv("Hash: {0}, Count: {1} records\n", H.first,
+                      H.second.size());
+    for (const auto &R : H.second) {
+      auto Iter = AdjusterSet.find(R.TI.getIndex());
+      StringRef Prefix;
+      if (Iter != AdjusterSet.end()) {
+        Prefix = "[HEAD]";
+        AdjusterSet.erase(Iter);
+      }
+      StringRef LeafName = getLeafTypeName(R.Record.Type);
+      uint32_t TI = R.TI.getIndex();
+      StringRef TypeName = TypeDB.getTypeName(R.TI);
+      outs() << formatv("{0,-6} {1} ({2:x}) {3}\n", Prefix, LeafName, TI,
+                        TypeName);
+    }
+  }
+
+  outs() << "\n";
+  outs() << "Dumping hash adjustment chains\n";
+  for (const auto &A : Tpi->getHashAdjusters()) {
+    TypeIndex TI(A.second);
+    StringRef TypeName = TypeDB.getTypeName(TI);
+    const CVType &HeadRecord = TypeDB.getTypeRecord(TI);
+    assert(HeadRecord.Hash.hasValue());
+
+    auto CollisionsIter = Hasher.Lookup.find(*HeadRecord.Hash);
+    if (CollisionsIter == Hasher.Lookup.end())
+      continue;
+
+    const auto &Collisions = CollisionsIter->second;
+    outs() << TypeName << "\n";
+    outs() << formatv("    [HEAD] {0:x} {1} {2}\n", A.second,
+                      getLeafTypeName(HeadRecord.Type), TypeName);
+    for (const auto &Chain : Collisions) {
+      if (Chain.TI == TI)
+        continue;
+      const CVType &TailRecord = TypeDB.getTypeRecord(Chain.TI);
+      outs() << formatv("           {0:x} {1} {2}\n", Chain.TI.getIndex(),
+                        getLeafTypeName(TailRecord.Type),
+                        TypeDB.getTypeName(Chain.TI));
+    }
+  }
+  outs() << formatv("There are {0} orphaned hash adjusters\n",
+                    AdjusterSet.size());
+  for (const auto &Adj : AdjusterSet) {
+    outs() << formatv("    {0}\n", Adj);
+  }
+
+  uint32_t DistinctHashValues = Hasher.Lookup.size();
+  outs() << formatv("{0}/{1} hash collisions", Count, DistinctHashValues);
+  return Error::success();
+}

Added: llvm/trunk/tools/llvm-pdbdump/Analyze.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/Analyze.h?rev=293795&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/Analyze.h (added)
+++ llvm/trunk/tools/llvm-pdbdump/Analyze.h Wed Feb  1 12:30:22 2017
@@ -0,0 +1,30 @@
+//===- Analyze.h - PDB analysis functions -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
+#define LLVM_TOOLS_LLVMPDBDUMP_ANALYSIS_H
+
+#include "OutputStyle.h"
+
+namespace llvm {
+namespace pdb {
+class PDBFile;
+class AnalysisStyle : public OutputStyle {
+public:
+  explicit AnalysisStyle(PDBFile &File);
+
+  Error dump() override;
+
+private:
+  PDBFile &File;
+};
+}
+}
+
+#endif

Modified: llvm/trunk/tools/llvm-pdbdump/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/CMakeLists.txt?rev=293795&r1=293794&r2=293795&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/CMakeLists.txt (original)
+++ llvm/trunk/tools/llvm-pdbdump/CMakeLists.txt Wed Feb  1 12:30:22 2017
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_llvm_tool(llvm-pdbdump
+  Analyze.cpp
   CompactTypeDumpVisitor.cpp
   llvm-pdbdump.cpp
   YamlSymbolDumper.cpp

Modified: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp?rev=293795&r1=293794&r2=293795&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp (original)
+++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp Wed Feb  1 12:30:22 2017
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-pdbdump.h"
+
+#include "Analyze.h"
 #include "LLVMOutputStyle.h"
 #include "LinePrinter.h"
 #include "OutputStyle.h"
@@ -86,6 +88,10 @@ cl::SubCommand
     PdbToYamlSubcommand("pdb2yaml",
                         "Generate a detailed YAML description of a PDB File");
 
+cl::SubCommand
+    AnalyzeSubcommand("analyze",
+                      "Analyze various aspects of a PDB's structure");
+
 cl::OptionCategory TypeCategory("Symbol Type Options");
 cl::OptionCategory FilterCategory("Filtering Options");
 cl::OptionCategory OtherOptions("Other Options");
@@ -318,6 +324,14 @@ cl::list<std::string> InputFilename(cl::
                                     cl::desc("<input PDB file>"), cl::Required,
                                     cl::sub(PdbToYamlSubcommand));
 }
+
+namespace analyze {
+cl::opt<bool> StringTable("hash-collisions", cl::desc("Find hash collisions"),
+                          cl::sub(AnalyzeSubcommand), cl::init(false));
+cl::list<std::string> InputFilename(cl::Positional,
+                                    cl::desc("<input PDB file>"), cl::Required,
+                                    cl::sub(AnalyzeSubcommand));
+}
 }
 
 static ExitOnError ExitOnErr;
@@ -423,6 +437,17 @@ static void dumpRaw(StringRef Path) {
   ExitOnErr(O->dump());
 }
 
+static void dumpAnalysis(StringRef Path) {
+  std::unique_ptr<IPDBSession> Session;
+  ExitOnErr(loadDataForPDB(PDB_ReaderType::Native, Path, Session));
+
+  NativeSession *NS = static_cast<NativeSession *>(Session.get());
+  PDBFile &File = NS->getPDBFile();
+  auto O = llvm::make_unique<AnalysisStyle>(File);
+
+  ExitOnErr(O->dump());
+}
+
 static void dumpPretty(StringRef Path) {
   std::unique_ptr<IPDBSession> Session;
 
@@ -608,6 +633,8 @@ int main(int argc_, const char *argv_[])
     pdb2Yaml(opts::pdb2yaml::InputFilename.front());
   } else if (opts::YamlToPdbSubcommand) {
     yamlToPdb(opts::yaml2pdb::InputFilename.front());
+  } else if (opts::AnalyzeSubcommand) {
+    dumpAnalysis(opts::analyze::InputFilename.front());
   } else if (opts::PrettySubcommand) {
     if (opts::pretty::Lines)
       opts::pretty::Compilands = true;




More information about the llvm-commits mailing list