[llvm-commits] [llvm] r135608 - in /llvm/trunk/tools/llvm-objdump: MCFunction.cpp MCFunction.h llvm-objdump.cpp

Benjamin Kramer benny.kra at googlemail.com
Wed Jul 20 12:37:36 PDT 2011


Author: d0k
Date: Wed Jul 20 14:37:35 2011
New Revision: 135608

URL: http://llvm.org/viewvc/llvm-project?rev=135608&view=rev
Log:
Sketch out an CFG reconstruction mode for llvm-objdump.

- Not great yet, but it's a start.
- Requires an object file with a symbol table. (I really want to fix this, but it'll need a whole new algorithm)
- ELF and COFF won't work at the moment due to libObject shortcomings.

To try it out run
$ llvm-objdump -d --cfg foo.o

This will create a graphviz file for every symbol in the object file's text section containing a CFG.

Added:
    llvm/trunk/tools/llvm-objdump/MCFunction.cpp
    llvm/trunk/tools/llvm-objdump/MCFunction.h
Modified:
    llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp

Added: llvm/trunk/tools/llvm-objdump/MCFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.cpp?rev=135608&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-objdump/MCFunction.cpp (added)
+++ llvm/trunk/tools/llvm-objdump/MCFunction.cpp Wed Jul 20 14:37:35 2011
@@ -0,0 +1,113 @@
+//===-- MCFunction.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the algorithm to break down a region of machine code
+// into basic blocks and try to reconstruct a CFG from it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <set>
+using namespace llvm;
+
+MCFunction
+MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
+                                 const MemoryObject &Region, uint64_t Start,
+                                 uint64_t End, const MCInstrInfo *InstrInfo,
+                                 raw_ostream &DebugOut) {
+  std::set<uint64_t> Splits;
+  Splits.insert(Start);
+  std::vector<MCDecodedInst> Instructions;
+  uint64_t Size;
+
+  // Disassemble code and gather basic block split points.
+  for (uint64_t Index = Start; Index < End; Index += Size) {
+    MCInst Inst;
+
+    if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut)) {
+      const MCInstrDesc &Desc = InstrInfo->get(Inst.getOpcode());
+      if (Desc.isBranch()) {
+        if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
+          int64_t Imm = Inst.getOperand(0).getImm();
+          // FIXME: Distinguish relocations from nop jumps.
+          if (Imm != 0) {
+            assert(Index+Imm+Size < End && "Branch out of function.");
+            Splits.insert(Index+Imm+Size);
+          }
+        }
+        Splits.insert(Index+Size);
+      }
+
+      Instructions.push_back(MCDecodedInst(Index, Size, Inst));
+    } else {
+      errs() << "warning: invalid instruction encoding\n";
+      if (Size == 0)
+        Size = 1; // skip illegible bytes
+    }
+
+  }
+
+  MCFunction f(Name);
+
+  // Create basic blocks.
+  unsigned ii = 0, ie = Instructions.size();
+  for (std::set<uint64_t>::iterator spi = Splits.begin(),
+       spe = Splits.end(); spi != spe; ++spi) {
+    MCBasicBlock BB;
+    uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi);
+    // Add instructions to the BB.
+    for (; ii != ie; ++ii) {
+      if (Instructions[ii].Address < *spi ||
+          Instructions[ii].Address >= BlockEnd)
+        break;
+      BB.addInst(Instructions[ii]);
+    }
+    f.addBlock(*spi, BB);
+  }
+
+  // Calculate successors of each block.
+  for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
+    MCBasicBlock &BB = i->second;
+    if (BB.getInsts().empty()) continue;
+    const MCDecodedInst &Inst = BB.getInsts().back();
+    const MCInstrDesc &Desc = InstrInfo->get(Inst.Inst.getOpcode());
+
+    if (Desc.isBranch()) {
+      // PCRel branch, we know the destination.
+      if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
+        int64_t Imm = Inst.Inst.getOperand(0).getImm();
+        if (Imm != 0)
+          BB.addSucc(&f.getBlockAtAddress(Inst.Address+Inst.Size+Imm));
+        // Conditional branches can also fall through to the next block.
+        if (Desc.isConditionalBranch() && llvm::next(i) != e)
+          BB.addSucc(&next(i)->second);
+      } else {
+        // Indirect branch. Bail and add all blocks of the function as a
+        // successor.
+        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
+          BB.addSucc(&i->second);
+      }
+    } else {
+      // No branch. Fall through to the next block.
+      if (!Desc.isReturn() && next(i) != e)
+        BB.addSucc(&next(i)->second);
+    }
+  }
+
+  return f;
+}

Added: llvm/trunk/tools/llvm-objdump/MCFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.h?rev=135608&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-objdump/MCFunction.h (added)
+++ llvm/trunk/tools/llvm-objdump/MCFunction.h Wed Jul 20 14:37:35 2011
@@ -0,0 +1,88 @@
+//===-- MCFunction.h ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the data structures to hold a CFG reconstructed from
+// machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCInst.h"
+#include <map>
+
+namespace llvm {
+
+class MCDisassembler;
+class MCInstrInfo;
+class MemoryObject;
+class raw_ostream;
+
+/// MCDecodedInst - Small container to hold an MCInst and associated info like
+/// address and size.
+struct MCDecodedInst {
+  uint64_t Address;
+  uint64_t Size;
+  MCInst Inst;
+
+  MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
+    : Address(Address), Size(Size), Inst(Inst) {}
+};
+
+/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
+/// MCBasicBlocks.
+class MCBasicBlock {
+  SmallVector<MCDecodedInst, 8> Insts;
+  typedef SmallPtrSet<MCBasicBlock*, 8> SetTy;
+  SetTy Succs;
+public:
+  ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
+
+  typedef SetTy::const_iterator succ_iterator;
+  succ_iterator succ_begin() const { return Succs.begin(); }
+  succ_iterator succ_end() const { return Succs.end(); }
+
+  void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
+  void addSucc(MCBasicBlock *BB) { Succs.insert(BB); }
+};
+
+/// MCFunction - Represents a named function in machine code, containing
+/// multiple MCBasicBlocks.
+class MCFunction {
+  const StringRef Name;
+  // Keep BBs sorted by address.
+  typedef std::map<uint64_t, MCBasicBlock> MapTy;
+  MapTy Blocks;
+public:
+  MCFunction(StringRef Name) : Name(Name) {}
+
+  // Create an MCFunction from a region of binary machine code.
+  static MCFunction
+  createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
+                       const MemoryObject &Region, uint64_t Start, uint64_t End,
+                       const MCInstrInfo *InstrInfo, raw_ostream &DebugOut);
+
+  typedef MapTy::iterator iterator;
+  iterator begin() { return Blocks.begin(); }
+  iterator end() { return Blocks.end(); }
+
+  StringRef getName() const { return Name; }
+
+  MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
+    assert(!Blocks.count(Address) && "Already a BB at address.");
+    return Blocks[Address] = BB;
+  }
+
+  MCBasicBlock &getBlockAtAddress(uint64_t Address) {
+    assert(Blocks.count(Address) && "No BB at address.");
+    return Blocks[Address];
+  }
+};
+
+}

Modified: llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp?rev=135608&r1=135607&r2=135608&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp (original)
+++ llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp Wed Jul 20 14:37:35 2011
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCFunction.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
@@ -21,6 +22,8 @@
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -52,6 +55,10 @@
   Disassembled("d", cl::desc("Alias for --disassemble"),
                cl::aliasopt(Disassemble));
 
+  cl::opt<bool>
+  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
+                      "write it to a graphviz file"));
+
   cl::opt<std::string>
   TripleName("triple", cl::desc("Target triple to disassemble for, "
                                 "see -version for available targets"));
@@ -156,6 +163,7 @@
     // GetTarget prints out stuff.
     return;
   }
+  const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
 
   outs() << '\n';
   outs() << Filename
@@ -233,15 +241,14 @@
       uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
       outs() << '\n' << Symbols[si].second << ":\n";
 
-      for (Index = Start; Index < End; Index += Size) {
-        MCInst Inst;
-
 #ifndef NDEBUG
         raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
 #else
         raw_ostream &DebugOut = nulls();
 #endif
 
+      for (Index = Start; Index < End; Index += Size) {
+        MCInst Inst;
         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
           uint64_t addr;
           if (error(i->getAddress(addr))) break;
@@ -255,6 +262,36 @@
             Size = 1; // skip illegible bytes
         }
       }
+
+      if (CFG) {
+        MCFunction f =
+          MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(),
+                                           memoryObject, Start, End, InstrInfo,
+                                           DebugOut);
+
+        // Start a new dot file.
+        std::string Error;
+        raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
+
+        Out << "digraph " << f.getName() << " {\n";
+        Out << "graph [ rankdir = \"LR\" ];\n";
+        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
+          Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>";
+          // Print instructions.
+          for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
+               ++ii) {
+            IP->printInst(&i->second.getInsts()[ii].Inst, Out);
+            Out << '|';
+          }
+          Out << "<o>\" shape=\"record\" ];\n";
+
+          // Add edges.
+          for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
+              se = i->second.succ_end(); si != se; ++si)
+            Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n";
+        }
+        Out << "}\n";
+      }
     }
   }
 }
@@ -271,6 +308,7 @@
   llvm::InitializeAllTargets();
   llvm::InitializeAllMCAsmInfos();
   llvm::InitializeAllMCCodeGenInfos();
+  llvm::InitializeAllMCInstrInfos();
   llvm::InitializeAllAsmPrinters();
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();





More information about the llvm-commits mailing list