[llvm-commits] [PATCH] Teach llvm-objdump to dump Win64 exception tables

Sean Silva silvas at purdue.edu
Thu Nov 22 18:37:11 PST 2012


+  for (unsigned i = 0; i < numCodes; i++) {
+    const UnwindCode &UC = UnwindCodes[i];

You should factor the body of this loop into a separate function.

+      if (UC.getOpInfo() == 0) {
+        size = UnwindCodes[++i].frameOffset;
+      } else {
+        size = UnwindCodes[i+1].frameOffset
+               + (((uint32_t) UnwindCodes[i+2].frameOffset) << 16);
+        i += 2;

This is terrifying. ++i, i+1, i+1, i+=2 all within a couple lines.
Please carefully comment this and break it up if possible (same with
the other cases). How do you know that this won't ever read off the
end? Also, instead of `size = ...`, just do `outs() << ...`

+  error_code ec;
+  if ((ec = sym.getAddress(ResolvedAddr))) return ec;

You have a number of places where you use this awkward construct.
Instead, you can declare directly inside the "if" like:

if (error_code ec = sym.....)

+  if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o)) {

I don't think you need the const inside the dyn_cast's template parameter here.

-    uint8_t codeOffset;
-    uint8_t unwindOp:4,
-            opInfo:4;
+ support::ulittle8_t codeOffset;
+ support::ulittle8_t unwindOpAndOpInfo;
+
   } u;

Kill the hard tabs

-  void setLanguageSpecificHandlerOffset(uint64_t offset) {
+  void setLanguageSpecificHandlerOffset(uint32_t offset) {
     *reinterpret_cast<uint64_t *>(getLanguageSpecificData()) = offset;

casting to uint64_t* but assigning a uint32_t? Seems fishy...


Also, your naming for variables is really inconsistent. Sometimes it
is like_this and other times LikeThis.

-- Sean Silva

On Thu, Nov 22, 2012 at 4:03 PM, Kai <kai at redstar.de> wrote:
> Hi Michael!
>
> Thank you for the detailed review! I tried to fix all issues. Please have a
> look again. (I am not sure if I really fixed the endianess issue on struct
> RuntimeFunction.)
>
> Regards
> Kai
>
>
> On 13.11.2012 22:28, Michael Spencer wrote:
>>
>> On Mon, Nov 12, 2012 at 1:42 PM, Kai <kai at redstar.de> wrote:
>>>
>>> Ping!
>>>
>>> I updates the patch with the following:
>>> - removed trailing space
>>> - added missing CMake entry
>>>
>>> Regards
>>> Kai
>>
>>
>>> +++ b/tools/llvm-objdump/COFFDump.cpp
>>> @@ -0,0 +1,310 @@
>>> +//===-- llvm-objdump.cpp - Object file dumping utility for llvm
>>> -----------===//
>>
>>
>> Should be COFFDump.cpp
>>
>>> +//
>>> +//                     The LLVM Compiler Infrastructure
>>> +//
>>> +// This file is distributed under the University of Illinois Open Source
>>> +// License. See LICENSE.TXT for details.
>>> +//
>>>
>>> +//===----------------------------------------------------------------------===//
>>> +//
>>> +// This file implements the COFF-specific dumper for llvm-objdump.
>>> +//
>>>
>>> +//===----------------------------------------------------------------------===//
>>
>>
>> Use the new doxygen style for file level comments.
>> http://llvm.org/docs/CodingStandards.html#file-headers
>>
>> This should include a link to the documentation covering Win64 EH
>> encoding.
>>
>>> +
>>> +#include "llvm-objdump.h"
>>> +#include "llvm/Object/COFF.h"
>>> +#include "llvm/Object/ObjectFile.h"
>>> +#include "llvm/Support/Format.h"
>>> +#include "llvm/Support/SourceMgr.h"
>>> +#include "llvm/Support/raw_ostream.h"
>>> +#include "llvm/Support/system_error.h"
>>> +#include "llvm/Support/Win64EH.h"
>>> +#include <algorithm>
>>> +#include <cstring>
>>> +using namespace llvm;
>>> +using namespace object;
>>> +
>>> +namespace {
>>> +  using namespace llvm::Win64EH;
>>
>>
>> The content of namespaces this large should not be indented.
>>
>>> +
>>> +  llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out,
>>> +                                    unsigned long long N) {
>>> +    if (N >= 10)
>>> +      Out << "0x";
>>> +    Out.write_hex(N);
>>> +    return Out;
>>> +  }
>>
>>
>> This is unused.
>>
>>> +
>>> +  StringRef GetCOFFUnwindCodeTypeName(uint8_t Code) {
>>> +    switch(Code) {
>>> +    default: llvm_unreachable("Invalid unwind code");
>>> +    case UOP_PushNonVol: return "UOP_PushNonVol";
>>> +    case UOP_AllocLarge: return "UOP_AllocLarge";
>>> +    case UOP_AllocSmall: return "UOP_AllocSmall";
>>> +    case UOP_SetFPReg: return "UOP_SetFPReg";
>>> +    case UOP_SaveNonVol: return "UOP_SaveNonVol";
>>> +    case UOP_SaveNonVolBig: return "UOP_SaveNonVolBig";
>>> +    case UOP_SaveXMM128: return "UOP_SaveXMM128";
>>> +    case UOP_SaveXMM128Big: return "UOP_SaveXMM128Big";
>>> +    case UOP_PushMachFrame: return "UOP_PushMachFrame";
>>> +    }
>>> +  }
>>> +
>>> +  StringRef GetCOFFUnwindRegisterName(uint8_t Reg) {
>>> +    switch(Reg) {
>>> +    default: llvm_unreachable("Invalid register");
>>> +    case 0: return "RAX";
>>> +    case 1: return "RCX";
>>> +    case 2: return "RDX";
>>> +    case 3: return "RBX";
>>> +    case 4: return "RSP";
>>> +    case 5: return "RBP";
>>> +    case 6: return "RSI";
>>> +    case 7: return "RDI";
>>> +    case 8: return "R8";
>>> +    case 9: return "R9";
>>> +    case 10: return "R10";
>>> +    case 11: return "R11";
>>> +    case 12: return "R12";
>>> +    case 13: return "R13";
>>> +    case 14: return "R14";
>>> +    case 15: return "R15";
>>> +    }
>>> +  }
>>> +
>>> +  void PrintCOFFUnwindCode(const UnwindCode* UnwindCodes, unsigned
>>> numCodes) {
>>
>>
>> * and & should be on the right. This error occurs in lots of other places
>> too.
>>
>> Also, lowercase function names. Always.
>>
>>> +    for (unsigned i = 0; i < numCodes; i++) {
>>> +      const UnwindCode& UC = UnwindCodes[i];
>>
>>
>> Using UnwindCodes like this is incorrect. If you would like to do
>> this, use llvm/Support/Endian.h and see examples from
>> llvm/Object/COFF.h.
>>
>>> +      outs() <<  format("    0x%02x: ", unsigned(UC.u.codeOffset))
>>> +             << GetCOFFUnwindCodeTypeName(UC.u.unwindOp);
>>> +      switch (UC.u.unwindOp) {
>>> +      case UOP_PushNonVol:
>>> +        outs() << " " << GetCOFFUnwindRegisterName(UC.u.opInfo);
>>> +        break;
>>> +      case UOP_AllocLarge: {
>>> +        uint32_t size;
>>> +        if (UC.u.opInfo == 0) {
>>> +          size = UnwindCodes[++i].frameOffset;
>>> +        }
>>> +        else {
>>
>>
>> No newline before else.
>>
>>> +          size = UnwindCodes[i+1].frameOffset
>>> +                 + (((uint32_t) UnwindCodes[i+2].frameOffset) << 16);
>>> +          i += 2;
>>> +        }
>>> +        outs() << " " << size;
>>> +        break;
>>> +      }
>>> +      case UOP_AllocSmall:
>>> +        outs() << " " << ((UC.u.opInfo+1) * 8);
>>> +        break;
>>> +      case UOP_SetFPReg:
>>> +        outs() << " ";
>>> +        break;
>>> +      case UOP_SaveNonVol:
>>> +        outs() << " " << GetCOFFUnwindRegisterName(UC.u.opInfo)
>>> +               << format(" [0x%04x]", 8 * UnwindCodes[++i].frameOffset);
>>> +        break;
>>> +      case UOP_SaveNonVolBig: {
>>> +        uint32_t ofs = UnwindCodes[i+1].frameOffset
>>> +                        + (((uint32_t) UnwindCodes[i+2].frameOffset) <<
>>> 16);
>>> +        i += 2;
>>> +        outs() << " " << GetCOFFUnwindRegisterName(UC.u.opInfo)
>>> +               << format(" [0x%08x]", ofs);
>>> +        break;
>>> +      }
>>> +      case UOP_SaveXMM128:
>>> +        outs() << " XMM" << static_cast<uint32_t>(UC.u.opInfo)
>>> +               << format(" [0x%04x]", 16 *
>>> UnwindCodes[++i].frameOffset);
>>> +        break;
>>> +      case UOP_SaveXMM128Big: {
>>> +        uint32_t ofs = UnwindCodes[i+1].frameOffset
>>> +                       + (((uint32_t) UnwindCodes[i+2].frameOffset) <<
>>> 16);
>>> +        i += 2;
>>> +        outs() << " XMM" << UC.u.opInfo << format(" [0x%08x]", ofs);
>>> +        break;
>>> +      }
>>> +      case UOP_PushMachFrame:
>>> +        outs() << " " << (UC.u.opInfo ? "w/o" : "w") << " error code";
>>> +        break;
>>> +      }
>>> +      outs() << "\n";
>>> +    }
>>> +  }
>>> +
>>> +  error_code ResolveCOFFRelocation(const COFFObjectFile* o,
>>> +                                   const SymbolRef& sym,
>>> +                                   const coff_section*& ResolvedSection,
>>> +                                   uint64_t& ResolvedAddr) {
>>
>>
>> This needs documentation. It's not really resolving a relocation. It's
>> getting the section and address of a symbol.
>>
>>> +    error_code ec;
>>> +    if ((ec = sym.getAddress(ResolvedAddr))) return ec;
>>> +    section_iterator iter(o->begin_sections());
>>> +    if ((ec = sym.getSection(iter))) return ec;
>>> +    ResolvedSection = o->getCOFFSection(iter);
>>> +    return object_error::success;
>>> +  }
>>> +
>>> +  error_code GetCOFFUnwindInfo(const COFFObjectFile* o,
>>> +                               const std::vector<RelocationRef> Rels,
>>> +                               uint64_t offset, ArrayRef<uint8_t>&
>>> Contents,
>>> +                               uint64_t& addr) {
>>
>>
>> This needs documentation and probably a better name. It looks like it
>> is getting the unwind info section and offset for a given offset in
>> Rels.
>>
>>> +    for (std::vector<RelocationRef>::const_iterator R = Rels.begin();
>>> +                                                    R != Rels.end();
>>> R++) {
>>> +      error_code ec;
>>> +      uint64_t ofs;
>>> +      if ((ec = R->getOffset(ofs))) return ec;
>>> +      if (ofs == offset) {
>>> +        SymbolRef sym;
>>> +        if ((ec = R->getSymbol(sym))) return ec;
>>> +        const coff_section* section;
>>> +        ResolveCOFFRelocation(o, sym, section, addr);
>>> +        if ((ec = o->getSectionContents(section, Contents))) return ec;
>>> +        break;
>>> +      }
>>> +    }
>>> +    return object_error::success;
>>> +  }
>>> +
>>> +  error_code GetCOFFSymbol(const std::vector<RelocationRef> Rels,
>>> +                           uint64_t offset, StringRef& name) {
>>
>>
>> Documentation. It seems this is extracting the symbol name from the
>> relocation in Rels that has offset offset.
>>
>>> +    for (std::vector<RelocationRef>::const_iterator R = Rels.begin();
>>> +                                                    R != Rels.end();
>>> R++) {
>>> +      error_code ec;
>>> +      uint64_t ofs;
>>> +      if ((ec = R->getOffset(ofs))) return ec;
>>> +      if (ofs == offset) {
>>> +        SymbolRef sym;
>>> +        if ((ec = R->getSymbol(sym))) return ec;
>>> +        if ((ec = sym.getName(name))) return ec;
>>> +        break;
>>> +      }
>>> +    }
>>> +    return object_error::success;
>>> +  }
>>> +
>>> +  void PrintCOFFSymbolAddress(llvm::raw_ostream &Out,
>>> +                              const std::vector<RelocationRef> Rels,
>>> +                              uint64_t offset, uint32_t disp) {
>>> +      StringRef sym;
>>> +      GetCOFFSymbol(Rels, offset, sym);
>>> +      Out << sym;
>>> +      if (disp > 0)
>>> +        Out << format(" + 0x%04x", disp);
>>> +  }
>>> +}
>>> +
>>> +void llvm::PrintCOFFUnwindInfo(const COFFObjectFile* o) {
>>> +  const coff_file_header *header;
>>> +  if (error(o->getHeader(header))) return;
>>> +
>>> +  if (header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) {
>>> +    errs() << "Unsupported image machine type "
>>> +              "(currently only AMD64 is supported).\n";
>>> +    return;
>>> +  }
>>> +
>>> +  const coff_section* pdata = 0;
>>
>>
>> * on right and uppercase.
>>
>>> +
>>> +  error_code ec;
>>> +  for (section_iterator si = o->begin_sections(),
>>> +                            se = o->end_sections();
>>> +                            si != se; si.increment(ec)) {
>>> +    if (error(ec)) return;
>>> +
>>> +    StringRef Name;
>>> +    if (error(si->getName(Name))) continue;
>>> +
>>> +    if (Name.compare(".pdata") == 0) {
>>
>>
>> Name == ".pdata"
>>
>>> +      pdata = o->getCOFFSection(si);
>>> +      std::vector<RelocationRef> Rels;
>>> +      for (relocation_iterator ri = si->begin_relocations(),
>>> +                               re = si->end_relocations();
>>> +                               ri != re; ri.increment(ec)) {
>>> +        if (error(ec)) break;
>>> +        Rels.push_back(*ri);
>>> +      }
>>> +
>>> +      // Sort relocations by address.
>>> +      std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
>>> +
>>> +      ArrayRef<uint8_t> Contents;
>>> +      if (error(o->getSectionContents(pdata, Contents))) continue;
>>> +      if (Contents.empty()) continue;
>>> +
>>> +      unsigned i = 0;
>>> +      while ((Contents.size() - i) >= sizeof(RuntimeFunction)) {
>>
>>
>> Sizeof RuntimeFunction is not guaranteed in this case.
>>
>>> +        const RuntimeFunction* RF =
>>> +            reinterpret_cast<const RuntimeFunction*>(Contents.data() +
>>> i);
>>
>>
>> This is definitely wrong and will fail on lots of platforms. If you
>> would like to do this, use llvm/Support/Endian.h and see examples from
>> llvm/Object/COFF.h.
>>
>>> +
>>> +        outs() << "Function Table:\n";
>>> +
>>> +        outs() << "  Start Address: ";
>>> +        PrintCOFFSymbolAddress(outs(), Rels,
>>> +                               i + offsetof(RuntimeFunction,
>>> startAddress),
>>> +                               RF->startAddress);
>>> +        outs() << "\n";
>>> +
>>> +        outs() << "  End Address: ";
>>> +        PrintCOFFSymbolAddress(outs(), Rels,
>>> +                               i + offsetof(RuntimeFunction,
>>> endAddress),
>>> +                               RF->endAddress);
>>> +        outs() << "\n";
>>> +
>>> +        outs() << "  Unwind Info Address: ";
>>> +        PrintCOFFSymbolAddress(outs(), Rels,
>>> +                               i + offsetof(RuntimeFunction,
>>> unwindInfoOffset),
>>> +                               RF->unwindInfoOffset);
>>> +        outs() << "\n";
>>> +
>>> +        ArrayRef<uint8_t> XContents;
>>> +        uint64_t UnwindInfoOffset = 0;
>>> +        if (error(GetCOFFUnwindInfo(o, Rels,
>>> +                                    i + offsetof(RuntimeFunction,
>>> unwindInfoOffset),
>>
>>
>> 80-col
>>
>>> +                                    XContents, UnwindInfoOffset)))
>>> continue;
>>> +        if (XContents.empty()) continue;
>>> +
>>> +        UnwindInfoOffset += RF->unwindInfoOffset;
>>> +        if (UnwindInfoOffset > XContents.size()) continue;
>>> +
>>> +        const Win64EH::UnwindInfo* UI = reinterpret_cast<const
>>> Win64EH::UnwindInfo*>
>>> +                             (XContents.data() + UnwindInfoOffset);
>>
>>
>> This has the same problem as RuntimeFunction, but even worse. Layout
>> of bitfields is implementation defined. The proper way to do this is
>> to use Endian.h and manually unpack the bit-fields using functions.
>>
>>> +
>>> +        outs() << "  Version: " << (int) UI->version << "\n";
>>> +        outs() << "  Flags: " << (int) UI->flags;
>>> +        if (UI->flags) {
>>> +            if (UI->flags & UNW_ExceptionHandler) outs() << "
>>> UNW_ExceptionHandler";
>>> +            if (UI->flags & UNW_TerminateHandler) outs() << "
>>> UNW_TerminateHandler";
>>> +            if (UI->flags & UNW_ChainInfo) outs() << " UNW_ChainInfo";
>>> +        }
>>> +        outs() << "\n";
>>> +        outs() << "  Size of prolog: " << (int) UI->prologSize << "\n";
>>> +        outs() << "  Number of Codes: " << (int) UI->numCodes << "\n";
>>> +        // Maybe this should move to output of UOP_SetFPReg?
>>> +        if (UI->frameRegister) {
>>> +          outs() << "  Frame register: "
>>> +                 << GetCOFFUnwindRegisterName(UI->frameRegister) <<
>>> "\n";
>>> +          outs() << "  Frame offset: " << 16 * (int) UI->frameOffset <<
>>> "\n";
>>> +        }
>>> +        else {
>>> +          outs() << "  No frame pointer used\n";
>>> +        }
>>> +        if (UI->flags & (UNW_ExceptionHandler | UNW_TerminateHandler)) {
>>> +          // FIXME: Output exception handler data
>>> +        }
>>> +        else if (UI->flags & UNW_ChainInfo) {
>>> +          // FIXME: Output chained unwind info
>>> +        }
>>> +
>>> +        if (UI->numCodes)
>>> +          outs() << "  Unwind Codes:\n";
>>> +
>>> +        PrintCOFFUnwindCode(&UI->unwindCodes[0], UI->numCodes);
>>> +
>>> +        outs() << "\n\n";
>>> +        outs().flush();
>>> +
>>> +        i += sizeof(RuntimeFunction);
>>> +      }
>>> +      continue;
>>> +    }
>>> +  }
>>> +}
>>
>>
>> - Michael Spencer
>>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



More information about the llvm-commits mailing list