[llvm-commits] [llvm] r140028 - in /llvm/trunk/tools/llvm-objdump: CMakeLists.txt MCFunction.cpp MCFunction.h MachODump.cpp llvm-objdump.cpp llvm-objdump.h

Evan Cheng evan.cheng at apple.com
Mon Sep 19 22:21:14 PDT 2011


Hi Benjamin,

Can you refactor the code a bit and add some comments?  For example, the loop in createFunctionFromMC needs to be factored out. DisassembleInputMachO is also crying out for  both refactoring and comments. 

Thanks,

Evan

On Sep 19, 2011, at 10:56 AM, Benjamin Kramer <benny.kra at googlemail.com> wrote:

> Author: d0k
> Date: Mon Sep 19 12:56:04 2011
> New Revision: 140028
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=140028&view=rev
> Log:
> Add a MachO-specific "mode" to llvm-objdump, that, if enabled, gathers additional information that are only available on MachO.
> 
> - It can take FunctionStarts from a binary to find entry points more accurately.
> - Symbol offsets in executables are correct now.
> 
> Added:
>    llvm/trunk/tools/llvm-objdump/MachODump.cpp
>    llvm/trunk/tools/llvm-objdump/llvm-objdump.h
> Modified:
>    llvm/trunk/tools/llvm-objdump/CMakeLists.txt
>    llvm/trunk/tools/llvm-objdump/MCFunction.cpp
>    llvm/trunk/tools/llvm-objdump/MCFunction.h
>    llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp
> 
> Modified: llvm/trunk/tools/llvm-objdump/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/CMakeLists.txt?rev=140028&r1=140027&r2=140028&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/CMakeLists.txt (original)
> +++ llvm/trunk/tools/llvm-objdump/CMakeLists.txt Mon Sep 19 12:56:04 2011
> @@ -8,5 +8,6 @@
> 
> add_llvm_tool(llvm-objdump
>   llvm-objdump.cpp
> +  MachODump.cpp
>   MCFunction.cpp
>   )
> 
> Modified: llvm/trunk/tools/llvm-objdump/MCFunction.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.cpp?rev=140028&r1=140027&r2=140028&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/MCFunction.cpp (original)
> +++ llvm/trunk/tools/llvm-objdump/MCFunction.cpp Mon Sep 19 12:56:04 2011
> @@ -30,48 +30,77 @@
> MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
>                                  const MemoryObject &Region, uint64_t Start,
>                                  uint64_t End, const MCInstrAnalysis *Ana,
> -                                 raw_ostream &DebugOut) {
> +                                 raw_ostream &DebugOut,
> +                                 SmallVectorImpl<uint64_t> &Calls) {
> +  std::vector<MCDecodedInst> Instructions;
>   std::set<uint64_t> Splits;
>   Splits.insert(Start);
> -  std::vector<MCDecodedInst> Instructions;
>   uint64_t Size;
> 
> -  // Disassemble code and gather basic block split points.
> -  for (uint64_t Index = Start; Index < End; Index += Size) {
> -    MCInst Inst;
> +  MCFunction f(Name);
> 
> -    if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())) {
> -      if (Ana->isBranch(Inst)) {
> -        uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
> -        // FIXME: Distinguish relocations from nop jumps.
> -        if (targ != -1ULL && (targ == Index+Size || targ >= End)) {
> +  {
> +  DenseSet<uint64_t> VisitedInsts;
> +  SmallVector<uint64_t, 16> WorkList;
> +  WorkList.push_back(Start);
> +  // Disassemble code and gather basic block split points.
> +  while (!WorkList.empty()) {
> +    uint64_t Index = WorkList.pop_back_val();
> +    if (VisitedInsts.find(Index) != VisitedInsts.end())
> +      continue;
> +
> +    for (;Index < End; Index += Size) {
> +      MCInst Inst;
> +
> +      if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
> +        if (Ana->isBranch(Inst)) {
> +          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
> +          if (targ != -1ULL && targ == Index+Size) {
> +            Instructions.push_back(MCDecodedInst(Index, Size, Inst));
> +            VisitedInsts.insert(Index);
> +            continue;
> +          }
> +          if (targ != -1ULL) {
> +            Splits.insert(targ);
> +            WorkList.push_back(targ);
> +            WorkList.push_back(Index+Size);
> +          }
> +          Splits.insert(Index+Size);
> +          Instructions.push_back(MCDecodedInst(Index, Size, Inst));
> +          VisitedInsts.insert(Index);
> +          break;
> +        } else if (Ana->isReturn(Inst)) {
> +          Splits.insert(Index+Size);
>           Instructions.push_back(MCDecodedInst(Index, Size, Inst));
> -          continue; // Skip branches that leave the function.
> +          VisitedInsts.insert(Index);
> +          break;
> +        } else if (Ana->isCall(Inst)) {
> +          uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
> +          if (targ != -1ULL && targ != Index+Size) {
> +            Calls.push_back(targ);
> +          }
>         }
> -        if (targ != -1ULL)
> -          Splits.insert(targ);
> -        Splits.insert(Index+Size);
> -      } else if (Ana->isReturn(Inst)) {
> -        Splits.insert(Index+Size);
> -      }
> 
> -      Instructions.push_back(MCDecodedInst(Index, Size, Inst));
> -    } else {
> -      errs() << "warning: invalid instruction encoding\n";
> -      if (Size == 0)
> -        Size = 1; // skip illegible bytes
> +        Instructions.push_back(MCDecodedInst(Index, Size, Inst));
> +        VisitedInsts.insert(Index);
> +      } else {
> +        VisitedInsts.insert(Index);
> +        errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
> +        if (Size == 0)
> +          Size = 1; // skip illegible bytes
> +      }
>     }
> -
> +  }
>   }
> 
> -  MCFunction f(Name);
> +  std::sort(Instructions.begin(), Instructions.end());
> 
> -  // Create basic blocks.
> +   // Create basic blocks.
>   unsigned ii = 0, ie = Instructions.size();
>   for (std::set<uint64_t>::iterator spi = Splits.begin(),
> -       spe = Splits.end(); spi != spe; ++spi) {
> +       spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
>     MCBasicBlock BB;
> -    uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi);
> +    uint64_t BlockEnd = *llvm::next(spi);
>     // Add instructions to the BB.
>     for (; ii != ie; ++ii) {
>       if (Instructions[ii].Address < *spi ||
> @@ -82,6 +111,8 @@
>     f.addBlock(*spi, BB);
>   }
> 
> +  std::sort(f.Blocks.begin(), f.Blocks.end());
> +
>   // Calculate successors of each block.
>   for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
>     MCBasicBlock &BB = i->second;
> @@ -94,16 +125,16 @@
>         // Indirect branch. Bail and add all blocks of the function as a
>         // successor.
>         for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
> -          BB.addSucc(&i->second);
> +          BB.addSucc(i->first);
>       } else if (targ != Inst.Address+Inst.Size)
> -        BB.addSucc(&f.getBlockAtAddress(targ));
> +        BB.addSucc(targ);
>       // Conditional branches can also fall through to the next block.
>       if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
> -        BB.addSucc(&llvm::next(i)->second);
> +        BB.addSucc(llvm::next(i)->first);
>     } else {
>       // No branch. Fall through to the next block.
>       if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
> -        BB.addSucc(&llvm::next(i)->second);
> +        BB.addSucc(llvm::next(i)->first);
>     }
>   }
> 
> 
> Modified: llvm/trunk/tools/llvm-objdump/MCFunction.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MCFunction.h?rev=140028&r1=140027&r2=140028&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/MCFunction.h (original)
> +++ llvm/trunk/tools/llvm-objdump/MCFunction.h Mon Sep 19 12:56:04 2011
> @@ -12,8 +12,11 @@
> //
> //===----------------------------------------------------------------------===//
> 
> +#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H
> +#define LLVM_OBJECTDUMP_MCFUNCTION_H
> +
> #include "llvm/ADT/ArrayRef.h"
> -#include "llvm/ADT/SmallPtrSet.h"
> +#include "llvm/ADT/DenseSet.h"
> #include "llvm/MC/MCInst.h"
> #include <map>
> 
> @@ -31,15 +34,20 @@
>   uint64_t Size;
>   MCInst Inst;
> 
> +  MCDecodedInst() {}
>   MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
>     : Address(Address), Size(Size), Inst(Inst) {}
> +
> +  bool operator<(const MCDecodedInst &RHS) const {
> +    return Address < RHS.Address;
> +  }
> };
> 
> /// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
> /// MCBasicBlocks.
> class MCBasicBlock {
> -  SmallVector<MCDecodedInst, 8> Insts;
> -  typedef SmallPtrSet<MCBasicBlock*, 8> SetTy;
> +  std::vector<MCDecodedInst> Insts;
> +  typedef DenseSet<uint64_t> SetTy;
>   SetTy Succs;
> public:
>   ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
> @@ -48,10 +56,14 @@
>   succ_iterator succ_begin() const { return Succs.begin(); }
>   succ_iterator succ_end() const { return Succs.end(); }
> 
> -  bool contains(MCBasicBlock *BB) const { return Succs.count(BB); }
> +  bool contains(uint64_t Addr) const { return Succs.count(Addr); }
> 
>   void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
> -  void addSucc(MCBasicBlock *BB) { Succs.insert(BB); }
> +  void addSucc(uint64_t Addr) { Succs.insert(Addr); }
> +
> +  bool operator<(const MCBasicBlock &RHS) const {
> +    return Insts.size() < RHS.Insts.size();
> +  }
> };
> 
> /// MCFunction - Represents a named function in machine code, containing
> @@ -59,7 +71,7 @@
> class MCFunction {
>   const StringRef Name;
>   // Keep BBs sorted by address.
> -  typedef std::map<uint64_t, MCBasicBlock> MapTy;
> +  typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy;
>   MapTy Blocks;
> public:
>   MCFunction(StringRef Name) : Name(Name) {}
> @@ -68,7 +80,8 @@
>   static MCFunction
>   createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
>                        const MemoryObject &Region, uint64_t Start, uint64_t End,
> -                       const MCInstrAnalysis *Ana, raw_ostream &DebugOut);
> +                       const MCInstrAnalysis *Ana, raw_ostream &DebugOut,
> +                       SmallVectorImpl<uint64_t> &Calls);
> 
>   typedef MapTy::iterator iterator;
>   iterator begin() { return Blocks.begin(); }
> @@ -77,14 +90,11 @@
>   StringRef getName() const { return Name; }
> 
>   MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
> -    assert(!Blocks.count(Address) && "Already a BB at address.");
> -    return Blocks[Address] = BB;
> -  }
> -
> -  MCBasicBlock &getBlockAtAddress(uint64_t Address) {
> -    assert(Blocks.count(Address) && "No BB at address.");
> -    return Blocks[Address];
> +    Blocks.push_back(std::make_pair(Address, BB));
> +    return Blocks.back().second;
>   }
> };
> 
> }
> +
> +#endif
> 
> Added: llvm/trunk/tools/llvm-objdump/MachODump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MachODump.cpp?rev=140028&view=auto
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/MachODump.cpp (added)
> +++ llvm/trunk/tools/llvm-objdump/MachODump.cpp Mon Sep 19 12:56:04 2011
> @@ -0,0 +1,489 @@
> +//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file implements the MachO-specific dumper for llvm-objdump.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm-objdump.h"
> +#include "MCFunction.h"
> +#include "llvm/Support/MachO.h"
> +#include "llvm/Object/MachOObject.h"
> +#include "llvm/ADT/OwningPtr.h"
> +#include "llvm/ADT/Triple.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include "llvm/MC/MCAsmInfo.h"
> +#include "llvm/MC/MCDisassembler.h"
> +#include "llvm/MC/MCInst.h"
> +#include "llvm/MC/MCInstPrinter.h"
> +#include "llvm/MC/MCInstrAnalysis.h"
> +#include "llvm/MC/MCInstrDesc.h"
> +#include "llvm/MC/MCInstrInfo.h"
> +#include "llvm/MC/MCSubtargetInfo.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/Format.h"
> +#include "llvm/Support/GraphWriter.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +#include "llvm/Support/TargetRegistry.h"
> +#include "llvm/Support/TargetSelect.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Support/system_error.h"
> +#include <algorithm>
> +#include <cstring>
> +using namespace llvm;
> +using namespace object;
> +
> +static cl::opt<bool>
> +  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
> +                      "write it to a graphviz file (MachO-only)"));
> +
> +static const Target *GetTarget(const MachOObject *MachOObj) {
> +  // Figure out the target triple.
> +  llvm::Triple TT("unknown-unknown-unknown");
> +  switch (MachOObj->getHeader().CPUType) {
> +  case llvm::MachO::CPUTypeI386:
> +    TT.setArch(Triple::ArchType(Triple::x86));
> +    break;
> +  case llvm::MachO::CPUTypeX86_64:
> +    TT.setArch(Triple::ArchType(Triple::x86_64));
> +    break;
> +  case llvm::MachO::CPUTypeARM:
> +    TT.setArch(Triple::ArchType(Triple::arm));
> +    break;
> +  case llvm::MachO::CPUTypePowerPC:
> +    TT.setArch(Triple::ArchType(Triple::ppc));
> +    break;
> +  case llvm::MachO::CPUTypePowerPC64:
> +    TT.setArch(Triple::ArchType(Triple::ppc64));
> +    break;
> +  }
> +
> +  TripleName = TT.str();
> +
> +  // Get the target specific parser.
> +  std::string Error;
> +  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
> +  if (TheTarget)
> +    return TheTarget;
> +
> +  errs() << "llvm-objdump: error: unable to get target for '" << TripleName
> +         << "', see --version and --triple.\n";
> +  return 0;
> +}
> +
> +struct Section {
> +  char Name[16];
> +  uint64_t Address;
> +  uint64_t Size;
> +  uint32_t Offset;
> +  uint32_t NumRelocs;
> +  uint64_t RelocTableOffset;
> +};
> +
> +struct Symbol {
> +  uint64_t Value;
> +  uint32_t StringIndex;
> +  uint8_t SectionIndex;
> +  bool operator<(const Symbol &RHS) const { return Value < RHS.Value; }
> +};
> +
> +static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
> +                        MachOObject *MachOObj, raw_ostream &OS) {
> +  for (unsigned i = 0; i != Sections.size(); ++i) {
> +    uint64_t addr = Address-Sections[i].Address;
> +    if (Sections[i].Address <= Address &&
> +        Sections[i].Address + Sections[i].Size > Address) {
> +      StringRef bytes = MachOObj->getData(Sections[i].Offset,
> +                                          Sections[i].Size);
> +      if (!strcmp(Sections[i].Name, "__cstring"))
> +        OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
> +      if (!strcmp(Sections[i].Name, "__cfstring"))
> +        OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
> +    }
> +  }
> +}
> +
> +void llvm::DisassembleInputMachO(StringRef Filename) {
> +  OwningPtr<MemoryBuffer> Buff;
> +
> +  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
> +    errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n";
> +    return;
> +  }
> +
> +  OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take()));
> +
> +  const Target *TheTarget = GetTarget(MachOObj.get());
> +  if (!TheTarget) {
> +    // GetTarget prints out stuff.
> +    return;
> +  }
> +  const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
> +  OwningPtr<MCInstrAnalysis>
> +    InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo));
> +
> +  // Set up disassembler.
> +  OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
> +
> +  if (!AsmInfo) {
> +    errs() << "error: no assembly info for target " << TripleName << "\n";
> +    return;
> +  }
> +
> +  OwningPtr<const MCSubtargetInfo>
> +    STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
> +
> +  if (!STI) {
> +    errs() << "error: no subtarget info for target " << TripleName << "\n";
> +    return;
> +  }
> +
> +  OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
> +  if (!DisAsm) {
> +    errs() << "error: no disassembler for target " << TripleName << "\n";
> +    return;
> +  }
> +
> +  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
> +  OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
> +        AsmPrinterVariant, *AsmInfo, *STI));
> +  if (!IP) {
> +    errs() << "error: no instruction printer for target " << TripleName << '\n';
> +    return;
> +  }
> +
> +  outs() << '\n';
> +  outs() << Filename << ":\n\n";
> +
> +  const macho::Header &Header = MachOObj->getHeader();
> +
> +  const MachOObject::LoadCommandInfo *SymtabLCI = 0;
> +  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
> +    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
> +    switch (LCI.Command.Type) {
> +    case macho::LCT_Symtab:
> +      SymtabLCI = &LCI;
> +      break;
> +    }
> +  }
> +
> +  // Read and register the symbol table data.
> +  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
> +  MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
> +  MachOObj->RegisterStringTable(*SymtabLC);
> +
> +  std::vector<Section> Sections;
> +  std::vector<Symbol> Symbols;
> +  std::vector<Symbol> UnsortedSymbols; // FIXME: duplication
> +  SmallVector<uint64_t, 8> FoundFns;
> +
> +  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
> +    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
> +    if (LCI.Command.Type == macho::LCT_Segment) {
> +      InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
> +      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC);
> +
> +      for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
> +        InMemoryStruct<macho::Section> Sect;
> +        MachOObj->ReadSection(LCI, SectNum, Sect);
> +
> +        Section S;
> +        memcpy(S.Name, Sect->Name, 16);
> +        S.Address = Sect->Address;
> +        S.Size = Sect->Size;
> +        S.Offset = Sect->Offset;
> +        S.NumRelocs = Sect->NumRelocationTableEntries;
> +        S.RelocTableOffset = Sect->RelocationTableOffset;
> +        Sections.push_back(S);
> +
> +        for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
> +          InMemoryStruct<macho::SymbolTableEntry> STE;
> +          MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
> +
> +          Symbol S;
> +          S.StringIndex = STE->StringIndex;
> +          S.SectionIndex = STE->SectionIndex;
> +          S.Value = STE->Value;
> +          Symbols.push_back(S);
> +          UnsortedSymbols.push_back(Symbols.back());
> +        }
> +      }
> +    } else if (LCI.Command.Type == macho::LCT_Segment64) {
> +      InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
> +      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC);
> +
> +      for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
> +        InMemoryStruct<macho::Section64> Sect64;
> +        MachOObj->ReadSection64(LCI, SectNum, Sect64);
> +
> +        Section S;
> +        memcpy(S.Name, Sect64->Name, 16);
> +        S.Address = Sect64->Address;
> +        S.Size = Sect64->Size;
> +        S.Offset = Sect64->Offset;
> +        S.NumRelocs = Sect64->NumRelocationTableEntries;
> +        S.RelocTableOffset = Sect64->RelocationTableOffset;
> +        Sections.push_back(S);
> +
> +        for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
> +          InMemoryStruct<macho::Symbol64TableEntry> STE;
> +          MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
> +
> +          Symbol S;
> +          S.StringIndex = STE->StringIndex;
> +          S.SectionIndex = STE->SectionIndex;
> +          S.Value = STE->Value;
> +          Symbols.push_back(S);
> +          UnsortedSymbols.push_back(Symbols.back());
> +        }
> +      }
> +    } else if (LCI.Command.Type == macho::LCT_FunctionStarts) {
> +      InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
> +      MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC);
> +
> +      MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns);
> +    }
> +  }
> +
> +  std::map<uint64_t, MCFunction*> FunctionMap;
> +
> +  // Sort the symbols by address, just in case they didn't come in that way.
> +  array_pod_sort(Symbols.begin(), Symbols.end());
> +
> +#ifndef NDEBUG
> +  raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
> +#else
> +  raw_ostream &DebugOut = nulls();
> +#endif
> +
> +  SmallVector<MCFunction, 16> Functions;
> +
> +  for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
> +    if (strcmp(Sections[SectIdx].Name, "__text"))
> +      continue;
> +
> +    uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset;
> +    for (unsigned i = 0, e = FoundFns.size(); i != e; ++i)
> +      FunctionMap.insert(std::pair<uint64_t,MCFunction*>(FoundFns[i]+VMAddr,0));
> +
> +    StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset,
> +                                        Sections[SectIdx].Size);
> +    StringRefMemoryObject memoryObject(Bytes);
> +    bool symbolTableWorked = false;
> +
> +    std::vector<std::pair<uint64_t, uint32_t> > Relocs;
> +    for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) {
> +      InMemoryStruct<macho::RelocationEntry> RE;
> +      MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE);
> +      Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff));
> +    }
> +    array_pod_sort(Relocs.begin(), Relocs.end());
> +
> +    for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
> +      if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx)
> +        continue;
> +
> +      uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address;
> +      uint64_t End = (SymIdx+1 == Symbols.size() ||
> +          Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ?
> +          Sections[SectIdx].Size :
> +          Symbols[SymIdx+1].Value - Sections[SectIdx].Address;
> +      uint64_t Size;
> +
> +      if (Start >= End)
> +        continue;
> +
> +      symbolTableWorked = true;
> +
> +      if (!CFG) {
> +        outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex)
> +          << ":\n";
> +        for (uint64_t Index = Start; Index < End; Index += Size) {
> +          MCInst Inst;
> +
> +          if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
> +                                     DebugOut, nulls())) {
> +            outs() << format("%8llx:\t", Sections[SectIdx].Address + Index);
> +            DumpBytes(StringRef(Bytes.data() + Index, Size));
> +            IP->printInst(&Inst, outs(), "");
> +            outs() << "\n";
> +          } else {
> +            errs() << "llvm-objdump: warning: invalid instruction encoding\n";
> +            if (Size == 0)
> +              Size = 1; // skip illegible bytes
> +          }
> +        }
> +      } else {
> +        // Create CFG and use it for disassembly.
> +        SmallVector<uint64_t, 16> Calls;
> +        MCFunction f =
> +          MCFunction::createFunctionFromMC(
> +              MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
> +              DisAsm.get(),
> +              memoryObject, Start, End,
> +              InstrAnalysis.get(), DebugOut,
> +              Calls);
> +
> +        Functions.push_back(f);
> +        FunctionMap[Start] = &Functions.back();
> +
> +        for (unsigned i = 0, e = Calls.size(); i != e; ++i)
> +          FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i], 0));
> +      }
> +    }
> +
> +    if (CFG) {
> +      if (!symbolTableWorked) {
> +        // Create CFG and use it for disassembly.
> +        SmallVector<uint64_t, 16> Calls;
> +        MCFunction f =
> +          MCFunction::createFunctionFromMC("__TEXT", DisAsm.get(),
> +              memoryObject, 0, Sections[SectIdx].Size,
> +              InstrAnalysis.get(), DebugOut,
> +              Calls);
> +
> +        Functions.push_back(f);
> +        FunctionMap[Sections[SectIdx].Offset] = &Functions.back();
> +
> +        for (unsigned i = 0, e = Calls.size(); i != e; ++i)
> +          FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i], 0));
> +      }
> +      for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
> +           me = FunctionMap.end(); mi != me; ++mi)
> +        if (mi->second == 0) {
> +          SmallVector<uint64_t, 16> Calls;
> +          MCFunction f =
> +            MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
> +                                             memoryObject, mi->first,
> +                                             Sections[SectIdx].Size,
> +                                             InstrAnalysis.get(), DebugOut,
> +                                             Calls);
> +          Functions.push_back(f);
> +          mi->second = &Functions.back();
> +          for (unsigned i = 0, e = Calls.size(); i != e; ++i)
> +            if (FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i],0))
> +                                                                        .second)
> +              mi = FunctionMap.begin();
> +        }
> +
> +      DenseSet<uint64_t> PrintedBlocks;
> +      for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) {
> +        MCFunction &f = Functions[ffi];
> +        for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
> +          if (!PrintedBlocks.insert(fi->first).second)
> +            continue;
> +          bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
> +
> +          // Only print blocks that have predecessors.
> +          // FIXME: Slow.
> +          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
> +              ++pi)
> +            if (pi->second.contains(fi->first)) {
> +              hasPreds = true;
> +              break;
> +            }
> +
> +          // Data block.
> +          if (!hasPreds && fi != f.begin()) {
> +            uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size :
> +                                                  llvm::next(fi)->first;
> +            outs() << "# " << End-fi->first << " bytes of data:\n";
> +            for (unsigned pos = fi->first; pos != End; ++pos) {
> +              outs() << format("%8x:\t", Sections[SectIdx].Address + pos);
> +              DumpBytes(StringRef(Bytes.data() + pos, 1));
> +              outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
> +            }
> +            continue;
> +          }
> +
> +          if (fi->second.contains(fi->first))
> +            outs() << "# Loop begin:\n";
> +
> +          for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
> +               ++ii) {
> +            const MCDecodedInst &Inst = fi->second.getInsts()[ii];
> +            if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) !=
> +                FunctionMap.end())
> +              outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]->
> +                                                             getName() << ":\n";
> +            outs() << format("%8llx:\t", Sections[SectIdx].Address +
> +                                         Inst.Address);
> +            DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
> +            // Simple loops.
> +            if (fi->second.contains(fi->first))
> +              outs() << '\t';
> +            IP->printInst(&Inst.Inst, outs(), "");
> +            for (unsigned j = 0; j != Relocs.size(); ++j)
> +              if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address &&
> +                  Relocs[j].first < Sections[SectIdx].Address + Inst.Address +
> +                                    Inst.Size) {
> +                outs() << "\t# "
> +                   << MachOObj->getStringAtIndex(
> +                                  UnsortedSymbols[Relocs[j].second].StringIndex)
> +                   << ' ';
> +                DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections,
> +                            MachOObj.get(), outs());
> +              }
> +            uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
> +                                                          Inst.Address,
> +                                                          Inst.Size);
> +            if (targ != -1ULL)
> +              DumpAddress(targ, Sections, MachOObj.get(), outs());
> +
> +            outs() << '\n';
> +          }
> +        }
> +
> +        // Start a new dot file.
> +        std::string Error;
> +        raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
> +        if (!Error.empty()) {
> +          errs() << "llvm-objdump: warning: " << Error << '\n';
> +          continue;
> +        }
> +
> +        Out << "digraph " << f.getName() << " {\n";
> +        Out << "graph [ rankdir = \"LR\" ];\n";
> +        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
> +          bool hasPreds = false;
> +          // Only print blocks that have predecessors.
> +          // FIXME: Slow.
> +          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
> +               ++pi)
> +            if (pi->second.contains(i->first)) {
> +              hasPreds = true;
> +              break;
> +            }
> +
> +          if (!hasPreds && i != f.begin())
> +            continue;
> +
> +          Out << '"' << i->first << "\" [ label=\"<a>";
> +          // Print instructions.
> +          for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
> +               ++ii) {
> +            // Escape special chars and print the instruction in mnemonic form.
> +            std::string Str;
> +            raw_string_ostream OS(Str);
> +            IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
> +            Out << DOT::EscapeString(OS.str()) << '|';
> +          }
> +          Out << "<o>\" shape=\"record\" ];\n";
> +
> +          // Add edges.
> +          for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
> +              se = i->second.succ_end(); si != se; ++si)
> +            Out << i->first << ":o -> " << *si <<":a\n";
> +        }
> +        Out << "}\n";
> +      }
> +    }
> +  }
> +}
> 
> Modified: llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp?rev=140028&r1=140027&r2=140028&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp (original)
> +++ llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp Mon Sep 19 12:56:04 2011
> @@ -13,6 +13,7 @@
> //
> //===----------------------------------------------------------------------===//
> 
> +#include "llvm-objdump.h"
> #include "MCFunction.h"
> #include "llvm/Object/ObjectFile.h"
> #include "llvm/ADT/OwningPtr.h"
> @@ -46,39 +47,37 @@
> using namespace llvm;
> using namespace object;
> 
> -namespace {
> -  cl::list<std::string>
> -  InputFilenames(cl::Positional, cl::desc("<input object files>"),
> -                 cl::ZeroOrMore);
> -
> -  cl::opt<bool>
> -  Disassemble("disassemble",
> -    cl::desc("Display assembler mnemonics for the machine instructions"));
> -  cl::alias
> -  Disassembled("d", cl::desc("Alias for --disassemble"),
> -               cl::aliasopt(Disassemble));
> -
> -  cl::opt<bool>
> -  CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
> -                      "write it to a graphviz file"));
> +static cl::list<std::string>
> +InputFilenames(cl::Positional, cl::desc("<input object files>"),cl::ZeroOrMore);
> 
> -  cl::opt<std::string>
> -  TripleName("triple", cl::desc("Target triple to disassemble for, "
> +static cl::opt<bool>
> +Disassemble("disassemble",
> +  cl::desc("Display assembler mnemonics for the machine instructions"));
> +static cl::alias
> +Disassembled("d", cl::desc("Alias for --disassemble"),
> +             cl::aliasopt(Disassemble));
> +
> +static cl::opt<bool>
> +MachO("macho", cl::desc("Use MachO specific object file parser"));
> +static cl::alias
> +MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachO));
> +
> +cl::opt<std::string>
> +llvm::TripleName("triple", cl::desc("Target triple to disassemble for, "
> +                                    "see -version for available targets"));
> +
> +cl::opt<std::string>
> +llvm::ArchName("arch", cl::desc("Target arch to disassemble for, "
>                                 "see -version for available targets"));
> 
> -  cl::opt<std::string>
> -  ArchName("arch", cl::desc("Target arch to disassemble for, "
> -                            "see -version for available targets"));
> -
> -  StringRef ToolName;
> -
> -  bool error(error_code ec) {
> -    if (!ec) return false;
> -
> -    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
> -    outs().flush();
> -    return true;
> -  }
> +static StringRef ToolName;
> +
> +static bool error(error_code ec) {
> +  if (!ec) return false;
> +
> +  outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
> +  outs().flush();
> +  return true;
> }
> 
> static const Target *GetTarget(const ObjectFile *Obj = NULL) {
> @@ -106,27 +105,8 @@
>   return 0;
> }
> 
> -namespace {
> -class StringRefMemoryObject : public MemoryObject {
> -private:
> -  StringRef Bytes;
> -public:
> -  StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
> -
> -  uint64_t getBase() const { return 0; }
> -  uint64_t getExtent() const { return Bytes.size(); }
> -
> -  int readByte(uint64_t Addr, uint8_t *Byte) const {
> -    if (Addr >= getExtent())
> -      return -1;
> -    *Byte = Bytes[Addr];
> -    return 0;
> -  }
> -};
> -}
> -
> -static void DumpBytes(StringRef bytes) {
> -  static char hex_rep[] = "0123456789abcdef";
> +void llvm::DumpBytes(StringRef bytes) {
> +  static const char hex_rep[] = "0123456789abcdef";
>   // FIXME: The real way to do this is to figure out the longest instruction
>   //        and align to that size before printing. I'll fix this when I get
>   //        around to outputting relocations.
> @@ -151,7 +131,7 @@
>   outs() << output;
> }
> 
> -static void DisassembleInput(const StringRef &Filename) {
> +void llvm::DisassembleInputLibObject(StringRef Filename) {
>   OwningPtr<MemoryBuffer> Buff;
> 
>   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
> @@ -259,118 +239,22 @@
>         raw_ostream &DebugOut = nulls();
> #endif
> 
> -      if (!CFG) {
> -        for (Index = Start; Index < End; Index += Size) {
> -          MCInst Inst;
> -
> -          if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
> -                                     DebugOut, nulls())) {
> -            uint64_t addr;
> -            if (error(i->getAddress(addr))) break;
> -            outs() << format("%8x:\t", addr + Index);
> -            DumpBytes(StringRef(Bytes.data() + Index, Size));
> -            IP->printInst(&Inst, outs(), "");
> -            outs() << "\n";
> -          } else {
> -            errs() << ToolName << ": warning: invalid instruction encoding\n";
> -            if (Size == 0)
> -              Size = 1; // skip illegible bytes
> -          }
> -        }
> -
> -      } else {
> -        // Create CFG and use it for disassembly.
> -        MCFunction f =
> -          MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(),
> -                                           memoryObject, Start, End,
> -                                           InstrAnalysis.get(), DebugOut);
> -
> -        for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
> -          bool hasPreds = false;
> -          // Only print blocks that have predecessors.
> -          // FIXME: Slow.
> -          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
> -              ++pi)
> -            if (pi->second.contains(&fi->second)) {
> -              hasPreds = true;
> -              break;
> -            }
> -
> -          // Data block.
> -          if (!hasPreds && fi != f.begin()) {
> -            uint64_t End = llvm::next(fi) == fe ? SectSize :
> -                                                  llvm::next(fi)->first;
> -            uint64_t addr;
> -            if (error(i->getAddress(addr))) break;
> -            outs() << "# " << End-fi->first << " bytes of data:\n";
> -            for (unsigned pos = fi->first; pos != End; ++pos) {
> -              outs() << format("%8x:\t", addr + pos);
> -              DumpBytes(StringRef(Bytes.data() + pos, 1));
> -              outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
> -            }
> -            continue;
> -          }
> -
> -          if (fi->second.contains(&fi->second))
> -            outs() << "# Loop begin:\n";
> -
> -          for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
> -               ++ii) {
> -            uint64_t addr;
> -            if (error(i->getAddress(addr))) break;
> -            const MCDecodedInst &Inst = fi->second.getInsts()[ii];
> -            outs() << format("%8x:\t", addr + Inst.Address);
> -            DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
> -            // Simple loops.
> -            if (fi->second.contains(&fi->second))
> -              outs() << '\t';
> -            IP->printInst(&Inst.Inst, outs(), "");
> -            outs() << '\n';
> -          }
> -        }
> -
> -        // Start a new dot file.
> -        std::string Error;
> -        raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
> -        if (!Error.empty()) {
> -          errs() << ToolName << ": warning: " << Error << '\n';
> -          continue;
> -        }
> +      for (Index = Start; Index < End; Index += Size) {
> +        MCInst Inst;
> 
> -        Out << "digraph " << f.getName() << " {\n";
> -        Out << "graph [ rankdir = \"LR\" ];\n";
> -        for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
> -          bool hasPreds = false;
> -          // Only print blocks that have predecessors.
> -          // FIXME: Slow.
> -          for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
> -               ++pi)
> -            if (pi->second.contains(&i->second)) {
> -              hasPreds = true;
> -              break;
> -            }
> -
> -          if (!hasPreds && i != f.begin())
> -            continue;
> -
> -          Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>";
> -          // Print instructions.
> -          for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
> -               ++ii) {
> -            // Escape special chars and print the instruction in mnemonic form.
> -            std::string Str;
> -            raw_string_ostream OS(Str);
> -            IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
> -            Out << DOT::EscapeString(OS.str()) << '|';
> -          }
> -          Out << "<o>\" shape=\"record\" ];\n";
> -
> -          // Add edges.
> -          for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
> -              se = i->second.succ_end(); si != se; ++si)
> -            Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n";
> +        if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
> +                                   DebugOut, nulls())) {
> +          uint64_t addr;
> +          if (error(i->getAddress(addr))) break;
> +          outs() << format("%8x:\t", addr + Index);
> +          DumpBytes(StringRef(Bytes.data() + Index, Size));
> +          IP->printInst(&Inst, outs(), "");
> +          outs() << "\n";
> +        } else {
> +          errs() << ToolName << ": warning: invalid instruction encoding\n";
> +          if (Size == 0)
> +            Size = 1; // skip illegible bytes
>         }
> -        Out << "}\n";
>       }
>     }
>   }
> @@ -404,8 +288,12 @@
>     return 2;
>   }
> 
> -  std::for_each(InputFilenames.begin(), InputFilenames.end(),
> -                DisassembleInput);
> +  if (MachO)
> +    std::for_each(InputFilenames.begin(), InputFilenames.end(),
> +                  DisassembleInputMachO);
> +  else
> +    std::for_each(InputFilenames.begin(), InputFilenames.end(),
> +                  DisassembleInputLibObject);
> 
>   return 0;
> }
> 
> Added: llvm/trunk/tools/llvm-objdump/llvm-objdump.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.h?rev=140028&view=auto
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/llvm-objdump.h (added)
> +++ llvm/trunk/tools/llvm-objdump/llvm-objdump.h Mon Sep 19 12:56:04 2011
> @@ -0,0 +1,47 @@
> +//===-- llvm-objdump.h ----------------------------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_OBJDUMP_H
> +#define LLVM_OBJDUMP_H
> +
> +#include "llvm/ADT/StringRef.h"
> +#include "llvm/Support/CommandLine.h"
> +#include "llvm/Support/DataTypes.h"
> +#include "llvm/Support/MemoryObject.h"
> +
> +namespace llvm {
> +
> +extern cl::opt<std::string> TripleName;
> +extern cl::opt<std::string> ArchName;
> +
> +// Various helper functions.
> +void DumpBytes(StringRef bytes);
> +void DisassembleInputLibObject(StringRef Filename);
> +void DisassembleInputMachO(StringRef Filename);
> +
> +class StringRefMemoryObject : public MemoryObject {
> +private:
> +  StringRef Bytes;
> +public:
> +  StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
> +
> +  uint64_t getBase() const { return 0; }
> +  uint64_t getExtent() const { return Bytes.size(); }
> +
> +  int readByte(uint64_t Addr, uint8_t *Byte) const {
> +    if (Addr >= getExtent())
> +      return -1;
> +    *Byte = Bytes[Addr];
> +    return 0;
> +  }
> +};
> +
> +}
> +
> +#endif
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list