[llvm] r183424 - Teach llvm-objdump with the -macho parser how to use the data in code table

Jim Grosbach grosbach at apple.com
Thu Jun 6 13:06:50 PDT 2013


Excellent!

The test file should be in the test/Object/ARM subdirectory, though, not X86.

-Jim

On Jun 6, 2013, at 10:20 AM, Kevin Enderby <enderby at apple.com> wrote:

> Author: enderby
> Date: Thu Jun  6 12:20:50 2013
> New Revision: 183424
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=183424&view=rev
> Log:
> Teach llvm-objdump with the -macho parser how to use the data in code table
> from the LC_DATA_IN_CODE load command.  And when disassembling print
> the data in code formatted for the kind of data it and not disassemble those
> bytes.
> 
> I added the format specific functionality to the derived class MachOObjectFile
> since these tables only appears in Mach-O object files. This is my first
> attempt to modify the libObject stuff so if folks have better suggestions
> how to fit this in or suggestions on the implementation please let me know.
> 
> rdar://11791371
> 
> Added:
>    llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7   (with props)
>    llvm/trunk/test/Object/X86/macho-data-in-code.test
> Modified:
>    llvm/trunk/include/llvm/Object/MachO.h
>    llvm/trunk/lib/Object/MachOObjectFile.cpp
>    llvm/trunk/tools/llvm-objdump/MachODump.cpp
>    llvm/trunk/tools/macho-dump/macho-dump.cpp
> 
> Modified: llvm/trunk/include/llvm/Object/MachO.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/MachO.h?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Object/MachO.h (original)
> +++ llvm/trunk/include/llvm/Object/MachO.h Thu Jun  6 12:20:50 2013
> @@ -25,6 +25,31 @@
> namespace llvm {
> namespace object {
> 
> +/// DiceRef - This is a value type class that represents a single
> +/// data in code entry in the table in a Mach-O object file.
> +class DiceRef {
> +  DataRefImpl DicePimpl;
> +  const ObjectFile *OwningObject;
> +
> +public:
> +  DiceRef() : OwningObject(NULL) { }
> +
> +  DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
> +
> +  bool operator==(const DiceRef &Other) const;
> +  bool operator<(const DiceRef &Other) const;
> +
> +  error_code getNext(DiceRef &Result) const;
> +
> +  error_code getOffset(uint32_t &Result) const;
> +  error_code getLength(uint16_t &Result) const;
> +  error_code getKind(uint16_t &Result) const;
> +
> +  DataRefImpl getRawDataRefImpl() const;
> +  const ObjectFile *getObjectFile() const;
> +};
> +typedef content_iterator<DiceRef> dice_iterator;
> +
> class MachOObjectFile : public ObjectFile {
> public:
>   struct LoadCommandInfo {
> @@ -108,6 +133,9 @@ public:
>   relocation_iterator getSectionRelBegin(unsigned Index) const;
>   relocation_iterator getSectionRelEnd(unsigned Index) const;
> 
> +  dice_iterator begin_dices() const;
> +  dice_iterator end_dices() const;
> +
>   // In a MachO file, sections have a segment name. This is used in the .o
>   // files. They have a single segment, but this field specifies which segment
>   // a section should be put in in the final object.
> @@ -152,6 +180,7 @@ public:
>   getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
> 
>   macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
> +  macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
>   macho::Header getHeader() const;
>   macho::Header64Ext getHeader64Ext() const;
>   macho::IndirectSymbolTableEntry
> @@ -161,6 +190,7 @@ public:
>                                                       unsigned Index) const;
>   macho::SymtabLoadCommand getSymtabLoadCommand() const;
>   macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
> +  macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
> 
>   StringRef getStringTableData() const;
>   bool is64Bit() const;
> @@ -175,8 +205,66 @@ private:
>   SectionList Sections;
>   const char *SymtabLoadCmd;
>   const char *DysymtabLoadCmd;
> +  const char *DataInCodeLoadCmd;
> };
> 
> +/// DiceRef
> +inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
> +  : DicePimpl(DiceP) , OwningObject(Owner) {}
> +
> +inline bool DiceRef::operator==(const DiceRef &Other) const {
> +  return DicePimpl == Other.DicePimpl;
> +}
> +
> +inline bool DiceRef::operator<(const DiceRef &Other) const {
> +  return DicePimpl < Other.DicePimpl;
> +}
> +
> +inline error_code DiceRef::getNext(DiceRef &Result) const {
> +  DataRefImpl Rel = DicePimpl;
> +  const macho::DataInCodeTableEntry *P =
> +    reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
> +  Rel.p = reinterpret_cast<uintptr_t>(P + 1);
> +  Result = DiceRef(Rel, OwningObject);
> +  return object_error::success;
> +}
> +
> +// Since a Mach-O data in code reference, a DiceRef, can only be created when
> +// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
> +// the methods that get the values of the fields of the reference.
> +
> +inline error_code DiceRef::getOffset(uint32_t &Result) const {
> +  const MachOObjectFile *MachOOF =
> +    static_cast<const MachOObjectFile *>(OwningObject);
> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> +  Result = Dice.Offset;
> +  return object_error::success;
> +}
> +
> +inline error_code DiceRef::getLength(uint16_t &Result) const {
> +  const MachOObjectFile *MachOOF =
> +    static_cast<const MachOObjectFile *>(OwningObject);
> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> +  Result = Dice.Length;
> +  return object_error::success;
> +}
> +
> +inline error_code DiceRef::getKind(uint16_t &Result) const {
> +  const MachOObjectFile *MachOOF =
> +    static_cast<const MachOObjectFile *>(OwningObject);
> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> +  Result = Dice.Kind;
> +  return object_error::success;
> +}
> +
> +inline DataRefImpl DiceRef::getRawDataRefImpl() const {
> +  return DicePimpl;
> +}
> +
> +inline const ObjectFile *DiceRef::getObjectFile() const {
> +  return OwningObject;
> +}
> +
> }
> }
> 
> 
> Modified: llvm/trunk/lib/Object/MachOObjectFile.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/MachOObjectFile.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Object/MachOObjectFile.cpp (original)
> +++ llvm/trunk/lib/Object/MachOObjectFile.cpp Thu Jun  6 12:20:50 2013
> @@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryB
>                                  bool IsLittleEndian, bool Is64bits,
>                                  error_code &ec)
>     : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
> -      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
> +      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
>   uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
>   macho::LoadCommandType SegmentLoadType = is64Bit() ?
>     macho::LCT_Segment64 : macho::LCT_Segment;
> @@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryB
>     } else if (Load.C.Type == macho::LCT_Dysymtab) {
>       assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
>       DysymtabLoadCmd = Load.Ptr;
> +    } else if (Load.C.Type == macho::LCT_DataInCode) {
> +      assert(!DataInCodeLoadCmd && "Multiple data in code tables");
> +      DataInCodeLoadCmd = Load.Ptr;
>     } else if (Load.C.Type == SegmentLoadType) {
>       uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
>       for (unsigned J = 0; J < NumSections; ++J) {
> @@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::get
>   return getSectionRelEnd(DRI);
> }
> 
> +dice_iterator MachOObjectFile::begin_dices() const {
> +  DataRefImpl DRI;
> +  if (!DataInCodeLoadCmd)
> +    return dice_iterator(DiceRef(DRI, this));
> +
> +  macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
> +  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
> +  return dice_iterator(DiceRef(DRI, this));
> +}
> +
> +dice_iterator MachOObjectFile::end_dices() const {
> +  DataRefImpl DRI;
> +  if (!DataInCodeLoadCmd)
> +    return dice_iterator(DiceRef(DRI, this));
> +
> +  macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
> +  unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
> +  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
> +  return dice_iterator(DiceRef(DRI, this));
> +}
> +
> StringRef
> MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
>   ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
> @@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefIm
>   return getStruct<macho::RelocationEntry>(this, P);
> }
> 
> +macho::DataInCodeTableEntry
> +MachOObjectFile::getDice(DataRefImpl Rel) const {
> +  const char *P = reinterpret_cast<const char *>(Rel.p);
> +  return getStruct<macho::DataInCodeTableEntry>(this, P);
> +}
> +
> macho::Header MachOObjectFile::getHeader() const {
>   return getStruct<macho::Header>(this, getPtr(this, 0));
> }
> @@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFi
>   return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
> }
> 
> +macho::LinkeditDataLoadCommand
> +MachOObjectFile::getDataInCodeLoadCommand() const {
> +  if (DataInCodeLoadCmd)
> +    return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
> +
> +  // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
> +  macho::LinkeditDataLoadCommand Cmd;
> +  Cmd.Type = macho::LCT_DataInCode;
> +  Cmd.Size = macho::LinkeditLoadCommandSize;
> +  Cmd.DataOffset = 0;
> +  Cmd.DataSize = 0;
> +  return Cmd;
> +}
> +
> StringRef MachOObjectFile::getStringTableData() const {
>   macho::SymtabLoadCommand S = getSymtabLoadCommand();
>   return getData().substr(S.StringTableOffset, S.StringTableSize);
> 
> Added: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7?rev=183424&view=auto
> ==============================================================================
> Binary file - no diff available.
> 
> Propchange: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
> ------------------------------------------------------------------------------
>    svn:mime-type = application/octet-stream
> 
> Added: llvm/trunk/test/Object/X86/macho-data-in-code.test
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/macho-data-in-code.test?rev=183424&view=auto
> ==============================================================================
> --- llvm/trunk/test/Object/X86/macho-data-in-code.test (added)
> +++ llvm/trunk/test/Object/X86/macho-data-in-code.test Thu Jun  6 12:20:50 2013
> @@ -0,0 +1,7 @@
> +RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
> +
> +CHECK:      12:	80 bd                                        	pop	{r7, pc}
> +
> +CHECK:      14:	38 00 00 00                                  	.long 56	@ KIND_DATA
> +CHECK:      16:	00 00                                        	movs	r0, r0
> +
> 
> Modified: llvm/trunk/tools/llvm-objdump/MachODump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MachODump.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/MachODump.cpp (original)
> +++ llvm/trunk/tools/llvm-objdump/MachODump.cpp Thu Jun  6 12:20:50 2013
> @@ -87,12 +87,73 @@ struct SymbolSorter {
>   }
> };
> 
> +// Types for the storted data in code table that is built before disassembly
> +// and the predicate function to sort them.
> +typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
> +typedef std::vector<DiceTableEntry> DiceTable;
> +typedef DiceTable::iterator dice_table_iterator;
> +
> +static bool
> +compareDiceTableEntries(const DiceTableEntry i,
> +                        const DiceTableEntry j) {
> +  return i.first == j.first;
> +}
> +
> +static void DumpDataInCode(const char *bytes, uint64_t Size,
> +                           unsigned short Kind) {
> +  uint64_t Value;
> +
> +  switch (Kind) {
> +  case macho::Data:
> +    switch (Size) {
> +    case 4:
> +      Value = bytes[3] << 24 |
> +              bytes[2] << 16 |
> +              bytes[1] << 8 |
> +              bytes[0];
> +      outs() << "\t.long " << Value;
> +      break;
> +    case 2:
> +      Value = bytes[1] << 8 |
> +              bytes[0];
> +      outs() << "\t.short " << Value;
> +      break;
> +    case 1:
> +      Value = bytes[0];
> +      outs() << "\t.byte " << Value;
> +      break;
> +    }
> +    outs() << "\t@ KIND_DATA\n";
> +    break;
> +  case macho::JumpTable8:
> +    Value = bytes[0];
> +    outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
> +    break;
> +  case macho::JumpTable16:
> +    Value = bytes[1] << 8 |
> +            bytes[0];
> +    outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
> +    break;
> +  case macho::JumpTable32:
> +    Value = bytes[3] << 24 |
> +            bytes[2] << 16 |
> +            bytes[1] << 8 |
> +            bytes[0];
> +    outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
> +    break;
> +  default:
> +    outs() << "\t@ data in code kind = " << Kind << "\n";
> +    break;
> +  }
> +}
> +
> static void
> getSectionsAndSymbols(const macho::Header Header,
>                       MachOObjectFile *MachOObj,
>                       std::vector<SectionRef> &Sections,
>                       std::vector<SymbolRef> &Symbols,
> -                      SmallVectorImpl<uint64_t> &FoundFns) {
> +                      SmallVectorImpl<uint64_t> &FoundFns,
> +                      uint64_t &BaseSegmentAddress) {
>   error_code ec;
>   for (symbol_iterator SI = MachOObj->begin_symbols(),
>        SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
> @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Heade
> 
>   MachOObjectFile::LoadCommandInfo Command =
>     MachOObj->getFirstLoadCommandInfo();
> +  bool BaseSegmentAddressSet = false;
>   for (unsigned i = 0; ; ++i) {
>     if (Command.C.Type == macho::LCT_FunctionStarts) {
>       // We found a function starts segment, parse the addresses for later
> @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Heade
> 
>       MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
>     }
> +    else if (Command.C.Type == macho::LCT_Segment) {
> +      macho::SegmentLoadCommand SLC =
> +        MachOObj->getSegmentLoadCommand(Command);
> +      StringRef SegName = SLC.Name;
> +      if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
> +        BaseSegmentAddressSet = true;
> +        BaseSegmentAddress = SLC.VMAddress;
> +      }
> +    }
> 
>     if (i == Header.NumLoadCommands - 1)
>       break;
> @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(Strin
>   std::vector<SectionRef> Sections;
>   std::vector<SymbolRef> Symbols;
>   SmallVector<uint64_t, 8> FoundFns;
> +  uint64_t BaseSegmentAddress;
> 
> -  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
> +  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
> +                        BaseSegmentAddress);
> 
>   // Make a copy of the unsorted symbol list. FIXME: duplication
>   std::vector<SymbolRef> UnsortedSymbols(Symbols);
>   // Sort the symbols by address, just in case they didn't come in that way.
>   std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
> 
> +  // Build a data in code table that is sorted on by the address of each entry.
> +  uint64_t BaseAddress = 0;
> +  if (Header.FileType == macho::HFT_Object)
> +    Sections[0].getAddress(BaseAddress);
> +  else
> +    BaseAddress = BaseSegmentAddress;
> +  DiceTable Dices;
> +  error_code ec;
> +  for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
> +       DI != DE; DI.increment(ec)){
> +    uint32_t Offset;
> +    DI->getOffset(Offset);
> +    Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
> +  }
> +  array_pod_sort(Dices.begin(), Dices.end());
> +
> #ifndef NDEBUG
>   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
> #else
> @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(Strin
>       for (uint64_t Index = Start; Index < End; Index += Size) {
>         MCInst Inst;
> 
> +        uint64_t SectAddress = 0;
> +        Sections[SectIdx].getAddress(SectAddress);
> +        outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
> +
> +        // Check the data in code table here to see if this is data not an
> +        // instruction to be disassembled.
> +        DiceTable Dice;
> +        Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
> +        dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
> +                                              Dice.begin(), Dice.end(),
> +                                              compareDiceTableEntries);
> +        if (DTI != Dices.end()){
> +          uint16_t Length;
> +          DTI->second.getLength(Length);
> +          DumpBytes(StringRef(Bytes.data() + Index, Length));
> +          uint16_t Kind;
> +          DTI->second.getKind(Kind);
> +          DumpDataInCode(Bytes.data() + Index, Length, Kind);
> +          continue;
> +        }
> +
>         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
>                                    DebugOut, nulls())) {
> -          uint64_t SectAddress = 0;
> -          Sections[SectIdx].getAddress(SectAddress);
> -          outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
> -
>           DumpBytes(StringRef(Bytes.data() + Index, Size));
>           IP->printInst(&Inst, outs(), "");
> 
> 
> Modified: llvm/trunk/tools/macho-dump/macho-dump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/macho-dump/macho-dump.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/tools/macho-dump/macho-dump.cpp (original)
> +++ llvm/trunk/tools/macho-dump/macho-dump.cpp Thu Jun  6 12:20:50 2013
> @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObj
>          << "  ('datasize', " << LLC.DataSize << ")\n"
>          << "  ('_data_regions', [\n";
> 
> -  unsigned NumRegions = LLC.DataSize / 8;
> +  unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
>   for (unsigned i = 0; i < NumRegions; ++i) {
>     macho::DataInCodeTableEntry DICE =
>       Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130606/92c3b09b/attachment.html>


More information about the llvm-commits mailing list