[llvm] r183424 - Teach llvm-objdump with the -macho parser how to use the data in code table

Kevin Enderby enderby at apple.com
Thu Jun 6 13:30:14 PDT 2013


You are correct, that test is using an ARM binary.  Fixed with r183442.

Kev

On Jun 6, 2013, at 1:06 PM, Jim Grosbach <grosbach at apple.com> wrote:

> Excellent!
> 
> The test file should be in the test/Object/ARM subdirectory, though, not X86.
> 
> -Jim
> 
> On Jun 6, 2013, at 10:20 AM, Kevin Enderby <enderby at apple.com> wrote:
> 
>> Author: enderby
>> Date: Thu Jun  6 12:20:50 2013
>> New Revision: 183424
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=183424&view=rev
>> Log:
>> Teach llvm-objdump with the -macho parser how to use the data in code table
>> from the LC_DATA_IN_CODE load command.  And when disassembling print
>> the data in code formatted for the kind of data it and not disassemble those
>> bytes.
>> 
>> I added the format specific functionality to the derived class MachOObjectFile
>> since these tables only appears in Mach-O object files. This is my first
>> attempt to modify the libObject stuff so if folks have better suggestions
>> how to fit this in or suggestions on the implementation please let me know.
>> 
>> rdar://11791371
>> 
>> Added:
>>    llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7   (with props)
>>    llvm/trunk/test/Object/X86/macho-data-in-code.test
>> Modified:
>>    llvm/trunk/include/llvm/Object/MachO.h
>>    llvm/trunk/lib/Object/MachOObjectFile.cpp
>>    llvm/trunk/tools/llvm-objdump/MachODump.cpp
>>    llvm/trunk/tools/macho-dump/macho-dump.cpp
>> 
>> Modified: llvm/trunk/include/llvm/Object/MachO.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/MachO.h?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Object/MachO.h (original)
>> +++ llvm/trunk/include/llvm/Object/MachO.h Thu Jun  6 12:20:50 2013
>> @@ -25,6 +25,31 @@
>> namespace llvm {
>> namespace object {
>> 
>> +/// DiceRef - This is a value type class that represents a single
>> +/// data in code entry in the table in a Mach-O object file.
>> +class DiceRef {
>> +  DataRefImpl DicePimpl;
>> +  const ObjectFile *OwningObject;
>> +
>> +public:
>> +  DiceRef() : OwningObject(NULL) { }
>> +
>> +  DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
>> +
>> +  bool operator==(const DiceRef &Other) const;
>> +  bool operator<(const DiceRef &Other) const;
>> +
>> +  error_code getNext(DiceRef &Result) const;
>> +
>> +  error_code getOffset(uint32_t &Result) const;
>> +  error_code getLength(uint16_t &Result) const;
>> +  error_code getKind(uint16_t &Result) const;
>> +
>> +  DataRefImpl getRawDataRefImpl() const;
>> +  const ObjectFile *getObjectFile() const;
>> +};
>> +typedef content_iterator<DiceRef> dice_iterator;
>> +
>> class MachOObjectFile : public ObjectFile {
>> public:
>>   struct LoadCommandInfo {
>> @@ -108,6 +133,9 @@ public:
>>   relocation_iterator getSectionRelBegin(unsigned Index) const;
>>   relocation_iterator getSectionRelEnd(unsigned Index) const;
>> 
>> +  dice_iterator begin_dices() const;
>> +  dice_iterator end_dices() const;
>> +
>>   // In a MachO file, sections have a segment name. This is used in the .o
>>   // files. They have a single segment, but this field specifies which segment
>>   // a section should be put in in the final object.
>> @@ -152,6 +180,7 @@ public:
>>   getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
>> 
>>   macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
>> +  macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
>>   macho::Header getHeader() const;
>>   macho::Header64Ext getHeader64Ext() const;
>>   macho::IndirectSymbolTableEntry
>> @@ -161,6 +190,7 @@ public:
>>                                                       unsigned Index) const;
>>   macho::SymtabLoadCommand getSymtabLoadCommand() const;
>>   macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
>> +  macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
>> 
>>   StringRef getStringTableData() const;
>>   bool is64Bit() const;
>> @@ -175,8 +205,66 @@ private:
>>   SectionList Sections;
>>   const char *SymtabLoadCmd;
>>   const char *DysymtabLoadCmd;
>> +  const char *DataInCodeLoadCmd;
>> };
>> 
>> +/// DiceRef
>> +inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
>> +  : DicePimpl(DiceP) , OwningObject(Owner) {}
>> +
>> +inline bool DiceRef::operator==(const DiceRef &Other) const {
>> +  return DicePimpl == Other.DicePimpl;
>> +}
>> +
>> +inline bool DiceRef::operator<(const DiceRef &Other) const {
>> +  return DicePimpl < Other.DicePimpl;
>> +}
>> +
>> +inline error_code DiceRef::getNext(DiceRef &Result) const {
>> +  DataRefImpl Rel = DicePimpl;
>> +  const macho::DataInCodeTableEntry *P =
>> +    reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
>> +  Rel.p = reinterpret_cast<uintptr_t>(P + 1);
>> +  Result = DiceRef(Rel, OwningObject);
>> +  return object_error::success;
>> +}
>> +
>> +// Since a Mach-O data in code reference, a DiceRef, can only be created when
>> +// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
>> +// the methods that get the values of the fields of the reference.
>> +
>> +inline error_code DiceRef::getOffset(uint32_t &Result) const {
>> +  const MachOObjectFile *MachOOF =
>> +    static_cast<const MachOObjectFile *>(OwningObject);
>> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> +  Result = Dice.Offset;
>> +  return object_error::success;
>> +}
>> +
>> +inline error_code DiceRef::getLength(uint16_t &Result) const {
>> +  const MachOObjectFile *MachOOF =
>> +    static_cast<const MachOObjectFile *>(OwningObject);
>> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> +  Result = Dice.Length;
>> +  return object_error::success;
>> +}
>> +
>> +inline error_code DiceRef::getKind(uint16_t &Result) const {
>> +  const MachOObjectFile *MachOOF =
>> +    static_cast<const MachOObjectFile *>(OwningObject);
>> +  macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> +  Result = Dice.Kind;
>> +  return object_error::success;
>> +}
>> +
>> +inline DataRefImpl DiceRef::getRawDataRefImpl() const {
>> +  return DicePimpl;
>> +}
>> +
>> +inline const ObjectFile *DiceRef::getObjectFile() const {
>> +  return OwningObject;
>> +}
>> +
>> }
>> }
>> 
>> 
>> Modified: llvm/trunk/lib/Object/MachOObjectFile.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/MachOObjectFile.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Object/MachOObjectFile.cpp (original)
>> +++ llvm/trunk/lib/Object/MachOObjectFile.cpp Thu Jun  6 12:20:50 2013
>> @@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryB
>>                                  bool IsLittleEndian, bool Is64bits,
>>                                  error_code &ec)
>>     : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
>> -      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
>> +      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
>>   uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
>>   macho::LoadCommandType SegmentLoadType = is64Bit() ?
>>     macho::LCT_Segment64 : macho::LCT_Segment;
>> @@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryB
>>     } else if (Load.C.Type == macho::LCT_Dysymtab) {
>>       assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
>>       DysymtabLoadCmd = Load.Ptr;
>> +    } else if (Load.C.Type == macho::LCT_DataInCode) {
>> +      assert(!DataInCodeLoadCmd && "Multiple data in code tables");
>> +      DataInCodeLoadCmd = Load.Ptr;
>>     } else if (Load.C.Type == SegmentLoadType) {
>>       uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
>>       for (unsigned J = 0; J < NumSections; ++J) {
>> @@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::get
>>   return getSectionRelEnd(DRI);
>> }
>> 
>> +dice_iterator MachOObjectFile::begin_dices() const {
>> +  DataRefImpl DRI;
>> +  if (!DataInCodeLoadCmd)
>> +    return dice_iterator(DiceRef(DRI, this));
>> +
>> +  macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
>> +  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
>> +  return dice_iterator(DiceRef(DRI, this));
>> +}
>> +
>> +dice_iterator MachOObjectFile::end_dices() const {
>> +  DataRefImpl DRI;
>> +  if (!DataInCodeLoadCmd)
>> +    return dice_iterator(DiceRef(DRI, this));
>> +
>> +  macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
>> +  unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
>> +  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
>> +  return dice_iterator(DiceRef(DRI, this));
>> +}
>> +
>> StringRef
>> MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
>>   ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
>> @@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefIm
>>   return getStruct<macho::RelocationEntry>(this, P);
>> }
>> 
>> +macho::DataInCodeTableEntry
>> +MachOObjectFile::getDice(DataRefImpl Rel) const {
>> +  const char *P = reinterpret_cast<const char *>(Rel.p);
>> +  return getStruct<macho::DataInCodeTableEntry>(this, P);
>> +}
>> +
>> macho::Header MachOObjectFile::getHeader() const {
>>   return getStruct<macho::Header>(this, getPtr(this, 0));
>> }
>> @@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFi
>>   return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
>> }
>> 
>> +macho::LinkeditDataLoadCommand
>> +MachOObjectFile::getDataInCodeLoadCommand() const {
>> +  if (DataInCodeLoadCmd)
>> +    return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
>> +
>> +  // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
>> +  macho::LinkeditDataLoadCommand Cmd;
>> +  Cmd.Type = macho::LCT_DataInCode;
>> +  Cmd.Size = macho::LinkeditLoadCommandSize;
>> +  Cmd.DataOffset = 0;
>> +  Cmd.DataSize = 0;
>> +  return Cmd;
>> +}
>> +
>> StringRef MachOObjectFile::getStringTableData() const {
>>   macho::SymtabLoadCommand S = getSymtabLoadCommand();
>>   return getData().substr(S.StringTableOffset, S.StringTableSize);
>> 
>> Added: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7?rev=183424&view=auto
>> ==============================================================================
>> Binary file - no diff available.
>> 
>> Propchange: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
>> ------------------------------------------------------------------------------
>>    svn:mime-type = application/octet-stream
>> 
>> Added: llvm/trunk/test/Object/X86/macho-data-in-code.test
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/macho-data-in-code.test?rev=183424&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Object/X86/macho-data-in-code.test (added)
>> +++ llvm/trunk/test/Object/X86/macho-data-in-code.test Thu Jun  6 12:20:50 2013
>> @@ -0,0 +1,7 @@
>> +RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
>> +
>> +CHECK:      12:	80 bd                                        	pop	{r7, pc}
>> +
>> +CHECK:      14:	38 00 00 00                                  	.long 56	@ KIND_DATA
>> +CHECK:      16:	00 00                                        	movs	r0, r0
>> +
>> 
>> Modified: llvm/trunk/tools/llvm-objdump/MachODump.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MachODump.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/tools/llvm-objdump/MachODump.cpp (original)
>> +++ llvm/trunk/tools/llvm-objdump/MachODump.cpp Thu Jun  6 12:20:50 2013
>> @@ -87,12 +87,73 @@ struct SymbolSorter {
>>   }
>> };
>> 
>> +// Types for the storted data in code table that is built before disassembly
>> +// and the predicate function to sort them.
>> +typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
>> +typedef std::vector<DiceTableEntry> DiceTable;
>> +typedef DiceTable::iterator dice_table_iterator;
>> +
>> +static bool
>> +compareDiceTableEntries(const DiceTableEntry i,
>> +                        const DiceTableEntry j) {
>> +  return i.first == j.first;
>> +}
>> +
>> +static void DumpDataInCode(const char *bytes, uint64_t Size,
>> +                           unsigned short Kind) {
>> +  uint64_t Value;
>> +
>> +  switch (Kind) {
>> +  case macho::Data:
>> +    switch (Size) {
>> +    case 4:
>> +      Value = bytes[3] << 24 |
>> +              bytes[2] << 16 |
>> +              bytes[1] << 8 |
>> +              bytes[0];
>> +      outs() << "\t.long " << Value;
>> +      break;
>> +    case 2:
>> +      Value = bytes[1] << 8 |
>> +              bytes[0];
>> +      outs() << "\t.short " << Value;
>> +      break;
>> +    case 1:
>> +      Value = bytes[0];
>> +      outs() << "\t.byte " << Value;
>> +      break;
>> +    }
>> +    outs() << "\t@ KIND_DATA\n";
>> +    break;
>> +  case macho::JumpTable8:
>> +    Value = bytes[0];
>> +    outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
>> +    break;
>> +  case macho::JumpTable16:
>> +    Value = bytes[1] << 8 |
>> +            bytes[0];
>> +    outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
>> +    break;
>> +  case macho::JumpTable32:
>> +    Value = bytes[3] << 24 |
>> +            bytes[2] << 16 |
>> +            bytes[1] << 8 |
>> +            bytes[0];
>> +    outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
>> +    break;
>> +  default:
>> +    outs() << "\t@ data in code kind = " << Kind << "\n";
>> +    break;
>> +  }
>> +}
>> +
>> static void
>> getSectionsAndSymbols(const macho::Header Header,
>>                       MachOObjectFile *MachOObj,
>>                       std::vector<SectionRef> &Sections,
>>                       std::vector<SymbolRef> &Symbols,
>> -                      SmallVectorImpl<uint64_t> &FoundFns) {
>> +                      SmallVectorImpl<uint64_t> &FoundFns,
>> +                      uint64_t &BaseSegmentAddress) {
>>   error_code ec;
>>   for (symbol_iterator SI = MachOObj->begin_symbols(),
>>        SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
>> @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Heade
>> 
>>   MachOObjectFile::LoadCommandInfo Command =
>>     MachOObj->getFirstLoadCommandInfo();
>> +  bool BaseSegmentAddressSet = false;
>>   for (unsigned i = 0; ; ++i) {
>>     if (Command.C.Type == macho::LCT_FunctionStarts) {
>>       // We found a function starts segment, parse the addresses for later
>> @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Heade
>> 
>>       MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
>>     }
>> +    else if (Command.C.Type == macho::LCT_Segment) {
>> +      macho::SegmentLoadCommand SLC =
>> +        MachOObj->getSegmentLoadCommand(Command);
>> +      StringRef SegName = SLC.Name;
>> +      if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
>> +        BaseSegmentAddressSet = true;
>> +        BaseSegmentAddress = SLC.VMAddress;
>> +      }
>> +    }
>> 
>>     if (i == Header.NumLoadCommands - 1)
>>       break;
>> @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(Strin
>>   std::vector<SectionRef> Sections;
>>   std::vector<SymbolRef> Symbols;
>>   SmallVector<uint64_t, 8> FoundFns;
>> +  uint64_t BaseSegmentAddress;
>> 
>> -  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
>> +  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
>> +                        BaseSegmentAddress);
>> 
>>   // Make a copy of the unsorted symbol list. FIXME: duplication
>>   std::vector<SymbolRef> UnsortedSymbols(Symbols);
>>   // Sort the symbols by address, just in case they didn't come in that way.
>>   std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
>> 
>> +  // Build a data in code table that is sorted on by the address of each entry.
>> +  uint64_t BaseAddress = 0;
>> +  if (Header.FileType == macho::HFT_Object)
>> +    Sections[0].getAddress(BaseAddress);
>> +  else
>> +    BaseAddress = BaseSegmentAddress;
>> +  DiceTable Dices;
>> +  error_code ec;
>> +  for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
>> +       DI != DE; DI.increment(ec)){
>> +    uint32_t Offset;
>> +    DI->getOffset(Offset);
>> +    Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
>> +  }
>> +  array_pod_sort(Dices.begin(), Dices.end());
>> +
>> #ifndef NDEBUG
>>   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
>> #else
>> @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(Strin
>>       for (uint64_t Index = Start; Index < End; Index += Size) {
>>         MCInst Inst;
>> 
>> +        uint64_t SectAddress = 0;
>> +        Sections[SectIdx].getAddress(SectAddress);
>> +        outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
>> +
>> +        // Check the data in code table here to see if this is data not an
>> +        // instruction to be disassembled.
>> +        DiceTable Dice;
>> +        Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
>> +        dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
>> +                                              Dice.begin(), Dice.end(),
>> +                                              compareDiceTableEntries);
>> +        if (DTI != Dices.end()){
>> +          uint16_t Length;
>> +          DTI->second.getLength(Length);
>> +          DumpBytes(StringRef(Bytes.data() + Index, Length));
>> +          uint16_t Kind;
>> +          DTI->second.getKind(Kind);
>> +          DumpDataInCode(Bytes.data() + Index, Length, Kind);
>> +          continue;
>> +        }
>> +
>>         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
>>                                    DebugOut, nulls())) {
>> -          uint64_t SectAddress = 0;
>> -          Sections[SectIdx].getAddress(SectAddress);
>> -          outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
>> -
>>           DumpBytes(StringRef(Bytes.data() + Index, Size));
>>           IP->printInst(&Inst, outs(), "");
>> 
>> 
>> Modified: llvm/trunk/tools/macho-dump/macho-dump.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/macho-dump/macho-dump.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/tools/macho-dump/macho-dump.cpp (original)
>> +++ llvm/trunk/tools/macho-dump/macho-dump.cpp Thu Jun  6 12:20:50 2013
>> @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObj
>>          << "  ('datasize', " << LLC.DataSize << ")\n"
>>          << "  ('_data_regions', [\n";
>> 
>> -  unsigned NumRegions = LLC.DataSize / 8;
>> +  unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
>>   for (unsigned i = 0; i < NumRegions; ++i) {
>>     macho::DataInCodeTableEntry DICE =
>>       Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130606/fbc3fe03/attachment.html>


More information about the llvm-commits mailing list