[llvm] r183424 - Teach llvm-objdump with the -macho parser how to use the data in code table
Kevin Enderby
enderby at apple.com
Thu Jun 6 13:30:14 PDT 2013
You are correct, that test is using an ARM binary. Fixed with r183442.
Kev
On Jun 6, 2013, at 1:06 PM, Jim Grosbach <grosbach at apple.com> wrote:
> Excellent!
>
> The test file should be in the test/Object/ARM subdirectory, though, not X86.
>
> -Jim
>
> On Jun 6, 2013, at 10:20 AM, Kevin Enderby <enderby at apple.com> wrote:
>
>> Author: enderby
>> Date: Thu Jun 6 12:20:50 2013
>> New Revision: 183424
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=183424&view=rev
>> Log:
>> Teach llvm-objdump with the -macho parser how to use the data in code table
>> from the LC_DATA_IN_CODE load command. And when disassembling print
>> the data in code formatted for the kind of data it and not disassemble those
>> bytes.
>>
>> I added the format specific functionality to the derived class MachOObjectFile
>> since these tables only appears in Mach-O object files. This is my first
>> attempt to modify the libObject stuff so if folks have better suggestions
>> how to fit this in or suggestions on the implementation please let me know.
>>
>> rdar://11791371
>>
>> Added:
>> llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7 (with props)
>> llvm/trunk/test/Object/X86/macho-data-in-code.test
>> Modified:
>> llvm/trunk/include/llvm/Object/MachO.h
>> llvm/trunk/lib/Object/MachOObjectFile.cpp
>> llvm/trunk/tools/llvm-objdump/MachODump.cpp
>> llvm/trunk/tools/macho-dump/macho-dump.cpp
>>
>> Modified: llvm/trunk/include/llvm/Object/MachO.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/MachO.h?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Object/MachO.h (original)
>> +++ llvm/trunk/include/llvm/Object/MachO.h Thu Jun 6 12:20:50 2013
>> @@ -25,6 +25,31 @@
>> namespace llvm {
>> namespace object {
>>
>> +/// DiceRef - This is a value type class that represents a single
>> +/// data in code entry in the table in a Mach-O object file.
>> +class DiceRef {
>> + DataRefImpl DicePimpl;
>> + const ObjectFile *OwningObject;
>> +
>> +public:
>> + DiceRef() : OwningObject(NULL) { }
>> +
>> + DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
>> +
>> + bool operator==(const DiceRef &Other) const;
>> + bool operator<(const DiceRef &Other) const;
>> +
>> + error_code getNext(DiceRef &Result) const;
>> +
>> + error_code getOffset(uint32_t &Result) const;
>> + error_code getLength(uint16_t &Result) const;
>> + error_code getKind(uint16_t &Result) const;
>> +
>> + DataRefImpl getRawDataRefImpl() const;
>> + const ObjectFile *getObjectFile() const;
>> +};
>> +typedef content_iterator<DiceRef> dice_iterator;
>> +
>> class MachOObjectFile : public ObjectFile {
>> public:
>> struct LoadCommandInfo {
>> @@ -108,6 +133,9 @@ public:
>> relocation_iterator getSectionRelBegin(unsigned Index) const;
>> relocation_iterator getSectionRelEnd(unsigned Index) const;
>>
>> + dice_iterator begin_dices() const;
>> + dice_iterator end_dices() const;
>> +
>> // In a MachO file, sections have a segment name. This is used in the .o
>> // files. They have a single segment, but this field specifies which segment
>> // a section should be put in in the final object.
>> @@ -152,6 +180,7 @@ public:
>> getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
>>
>> macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
>> + macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
>> macho::Header getHeader() const;
>> macho::Header64Ext getHeader64Ext() const;
>> macho::IndirectSymbolTableEntry
>> @@ -161,6 +190,7 @@ public:
>> unsigned Index) const;
>> macho::SymtabLoadCommand getSymtabLoadCommand() const;
>> macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
>> + macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
>>
>> StringRef getStringTableData() const;
>> bool is64Bit() const;
>> @@ -175,8 +205,66 @@ private:
>> SectionList Sections;
>> const char *SymtabLoadCmd;
>> const char *DysymtabLoadCmd;
>> + const char *DataInCodeLoadCmd;
>> };
>>
>> +/// DiceRef
>> +inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
>> + : DicePimpl(DiceP) , OwningObject(Owner) {}
>> +
>> +inline bool DiceRef::operator==(const DiceRef &Other) const {
>> + return DicePimpl == Other.DicePimpl;
>> +}
>> +
>> +inline bool DiceRef::operator<(const DiceRef &Other) const {
>> + return DicePimpl < Other.DicePimpl;
>> +}
>> +
>> +inline error_code DiceRef::getNext(DiceRef &Result) const {
>> + DataRefImpl Rel = DicePimpl;
>> + const macho::DataInCodeTableEntry *P =
>> + reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
>> + Rel.p = reinterpret_cast<uintptr_t>(P + 1);
>> + Result = DiceRef(Rel, OwningObject);
>> + return object_error::success;
>> +}
>> +
>> +// Since a Mach-O data in code reference, a DiceRef, can only be created when
>> +// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
>> +// the methods that get the values of the fields of the reference.
>> +
>> +inline error_code DiceRef::getOffset(uint32_t &Result) const {
>> + const MachOObjectFile *MachOOF =
>> + static_cast<const MachOObjectFile *>(OwningObject);
>> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> + Result = Dice.Offset;
>> + return object_error::success;
>> +}
>> +
>> +inline error_code DiceRef::getLength(uint16_t &Result) const {
>> + const MachOObjectFile *MachOOF =
>> + static_cast<const MachOObjectFile *>(OwningObject);
>> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> + Result = Dice.Length;
>> + return object_error::success;
>> +}
>> +
>> +inline error_code DiceRef::getKind(uint16_t &Result) const {
>> + const MachOObjectFile *MachOOF =
>> + static_cast<const MachOObjectFile *>(OwningObject);
>> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
>> + Result = Dice.Kind;
>> + return object_error::success;
>> +}
>> +
>> +inline DataRefImpl DiceRef::getRawDataRefImpl() const {
>> + return DicePimpl;
>> +}
>> +
>> +inline const ObjectFile *DiceRef::getObjectFile() const {
>> + return OwningObject;
>> +}
>> +
>> }
>> }
>>
>>
>> Modified: llvm/trunk/lib/Object/MachOObjectFile.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/MachOObjectFile.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Object/MachOObjectFile.cpp (original)
>> +++ llvm/trunk/lib/Object/MachOObjectFile.cpp Thu Jun 6 12:20:50 2013
>> @@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryB
>> bool IsLittleEndian, bool Is64bits,
>> error_code &ec)
>> : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
>> - SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
>> + SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
>> uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
>> macho::LoadCommandType SegmentLoadType = is64Bit() ?
>> macho::LCT_Segment64 : macho::LCT_Segment;
>> @@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryB
>> } else if (Load.C.Type == macho::LCT_Dysymtab) {
>> assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
>> DysymtabLoadCmd = Load.Ptr;
>> + } else if (Load.C.Type == macho::LCT_DataInCode) {
>> + assert(!DataInCodeLoadCmd && "Multiple data in code tables");
>> + DataInCodeLoadCmd = Load.Ptr;
>> } else if (Load.C.Type == SegmentLoadType) {
>> uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
>> for (unsigned J = 0; J < NumSections; ++J) {
>> @@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::get
>> return getSectionRelEnd(DRI);
>> }
>>
>> +dice_iterator MachOObjectFile::begin_dices() const {
>> + DataRefImpl DRI;
>> + if (!DataInCodeLoadCmd)
>> + return dice_iterator(DiceRef(DRI, this));
>> +
>> + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
>> + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
>> + return dice_iterator(DiceRef(DRI, this));
>> +}
>> +
>> +dice_iterator MachOObjectFile::end_dices() const {
>> + DataRefImpl DRI;
>> + if (!DataInCodeLoadCmd)
>> + return dice_iterator(DiceRef(DRI, this));
>> +
>> + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
>> + unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
>> + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
>> + return dice_iterator(DiceRef(DRI, this));
>> +}
>> +
>> StringRef
>> MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
>> ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
>> @@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefIm
>> return getStruct<macho::RelocationEntry>(this, P);
>> }
>>
>> +macho::DataInCodeTableEntry
>> +MachOObjectFile::getDice(DataRefImpl Rel) const {
>> + const char *P = reinterpret_cast<const char *>(Rel.p);
>> + return getStruct<macho::DataInCodeTableEntry>(this, P);
>> +}
>> +
>> macho::Header MachOObjectFile::getHeader() const {
>> return getStruct<macho::Header>(this, getPtr(this, 0));
>> }
>> @@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFi
>> return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
>> }
>>
>> +macho::LinkeditDataLoadCommand
>> +MachOObjectFile::getDataInCodeLoadCommand() const {
>> + if (DataInCodeLoadCmd)
>> + return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
>> +
>> + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
>> + macho::LinkeditDataLoadCommand Cmd;
>> + Cmd.Type = macho::LCT_DataInCode;
>> + Cmd.Size = macho::LinkeditLoadCommandSize;
>> + Cmd.DataOffset = 0;
>> + Cmd.DataSize = 0;
>> + return Cmd;
>> +}
>> +
>> StringRef MachOObjectFile::getStringTableData() const {
>> macho::SymtabLoadCommand S = getSymtabLoadCommand();
>> return getData().substr(S.StringTableOffset, S.StringTableSize);
>>
>> Added: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7?rev=183424&view=auto
>> ==============================================================================
>> Binary file - no diff available.
>>
>> Propchange: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
>> ------------------------------------------------------------------------------
>> svn:mime-type = application/octet-stream
>>
>> Added: llvm/trunk/test/Object/X86/macho-data-in-code.test
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/macho-data-in-code.test?rev=183424&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Object/X86/macho-data-in-code.test (added)
>> +++ llvm/trunk/test/Object/X86/macho-data-in-code.test Thu Jun 6 12:20:50 2013
>> @@ -0,0 +1,7 @@
>> +RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
>> +
>> +CHECK: 12: 80 bd pop {r7, pc}
>> +
>> +CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA
>> +CHECK: 16: 00 00 movs r0, r0
>> +
>>
>> Modified: llvm/trunk/tools/llvm-objdump/MachODump.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MachODump.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/tools/llvm-objdump/MachODump.cpp (original)
>> +++ llvm/trunk/tools/llvm-objdump/MachODump.cpp Thu Jun 6 12:20:50 2013
>> @@ -87,12 +87,73 @@ struct SymbolSorter {
>> }
>> };
>>
>> +// Types for the storted data in code table that is built before disassembly
>> +// and the predicate function to sort them.
>> +typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
>> +typedef std::vector<DiceTableEntry> DiceTable;
>> +typedef DiceTable::iterator dice_table_iterator;
>> +
>> +static bool
>> +compareDiceTableEntries(const DiceTableEntry i,
>> + const DiceTableEntry j) {
>> + return i.first == j.first;
>> +}
>> +
>> +static void DumpDataInCode(const char *bytes, uint64_t Size,
>> + unsigned short Kind) {
>> + uint64_t Value;
>> +
>> + switch (Kind) {
>> + case macho::Data:
>> + switch (Size) {
>> + case 4:
>> + Value = bytes[3] << 24 |
>> + bytes[2] << 16 |
>> + bytes[1] << 8 |
>> + bytes[0];
>> + outs() << "\t.long " << Value;
>> + break;
>> + case 2:
>> + Value = bytes[1] << 8 |
>> + bytes[0];
>> + outs() << "\t.short " << Value;
>> + break;
>> + case 1:
>> + Value = bytes[0];
>> + outs() << "\t.byte " << Value;
>> + break;
>> + }
>> + outs() << "\t@ KIND_DATA\n";
>> + break;
>> + case macho::JumpTable8:
>> + Value = bytes[0];
>> + outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
>> + break;
>> + case macho::JumpTable16:
>> + Value = bytes[1] << 8 |
>> + bytes[0];
>> + outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
>> + break;
>> + case macho::JumpTable32:
>> + Value = bytes[3] << 24 |
>> + bytes[2] << 16 |
>> + bytes[1] << 8 |
>> + bytes[0];
>> + outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
>> + break;
>> + default:
>> + outs() << "\t@ data in code kind = " << Kind << "\n";
>> + break;
>> + }
>> +}
>> +
>> static void
>> getSectionsAndSymbols(const macho::Header Header,
>> MachOObjectFile *MachOObj,
>> std::vector<SectionRef> &Sections,
>> std::vector<SymbolRef> &Symbols,
>> - SmallVectorImpl<uint64_t> &FoundFns) {
>> + SmallVectorImpl<uint64_t> &FoundFns,
>> + uint64_t &BaseSegmentAddress) {
>> error_code ec;
>> for (symbol_iterator SI = MachOObj->begin_symbols(),
>> SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
>> @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Heade
>>
>> MachOObjectFile::LoadCommandInfo Command =
>> MachOObj->getFirstLoadCommandInfo();
>> + bool BaseSegmentAddressSet = false;
>> for (unsigned i = 0; ; ++i) {
>> if (Command.C.Type == macho::LCT_FunctionStarts) {
>> // We found a function starts segment, parse the addresses for later
>> @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Heade
>>
>> MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
>> }
>> + else if (Command.C.Type == macho::LCT_Segment) {
>> + macho::SegmentLoadCommand SLC =
>> + MachOObj->getSegmentLoadCommand(Command);
>> + StringRef SegName = SLC.Name;
>> + if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
>> + BaseSegmentAddressSet = true;
>> + BaseSegmentAddress = SLC.VMAddress;
>> + }
>> + }
>>
>> if (i == Header.NumLoadCommands - 1)
>> break;
>> @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(Strin
>> std::vector<SectionRef> Sections;
>> std::vector<SymbolRef> Symbols;
>> SmallVector<uint64_t, 8> FoundFns;
>> + uint64_t BaseSegmentAddress;
>>
>> - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
>> + getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
>> + BaseSegmentAddress);
>>
>> // Make a copy of the unsorted symbol list. FIXME: duplication
>> std::vector<SymbolRef> UnsortedSymbols(Symbols);
>> // Sort the symbols by address, just in case they didn't come in that way.
>> std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
>>
>> + // Build a data in code table that is sorted on by the address of each entry.
>> + uint64_t BaseAddress = 0;
>> + if (Header.FileType == macho::HFT_Object)
>> + Sections[0].getAddress(BaseAddress);
>> + else
>> + BaseAddress = BaseSegmentAddress;
>> + DiceTable Dices;
>> + error_code ec;
>> + for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
>> + DI != DE; DI.increment(ec)){
>> + uint32_t Offset;
>> + DI->getOffset(Offset);
>> + Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
>> + }
>> + array_pod_sort(Dices.begin(), Dices.end());
>> +
>> #ifndef NDEBUG
>> raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
>> #else
>> @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(Strin
>> for (uint64_t Index = Start; Index < End; Index += Size) {
>> MCInst Inst;
>>
>> + uint64_t SectAddress = 0;
>> + Sections[SectIdx].getAddress(SectAddress);
>> + outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
>> +
>> + // Check the data in code table here to see if this is data not an
>> + // instruction to be disassembled.
>> + DiceTable Dice;
>> + Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
>> + dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
>> + Dice.begin(), Dice.end(),
>> + compareDiceTableEntries);
>> + if (DTI != Dices.end()){
>> + uint16_t Length;
>> + DTI->second.getLength(Length);
>> + DumpBytes(StringRef(Bytes.data() + Index, Length));
>> + uint16_t Kind;
>> + DTI->second.getKind(Kind);
>> + DumpDataInCode(Bytes.data() + Index, Length, Kind);
>> + continue;
>> + }
>> +
>> if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
>> DebugOut, nulls())) {
>> - uint64_t SectAddress = 0;
>> - Sections[SectIdx].getAddress(SectAddress);
>> - outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
>> -
>> DumpBytes(StringRef(Bytes.data() + Index, Size));
>> IP->printInst(&Inst, outs(), "");
>>
>>
>> Modified: llvm/trunk/tools/macho-dump/macho-dump.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/macho-dump/macho-dump.cpp?rev=183424&r1=183423&r2=183424&view=diff
>> ==============================================================================
>> --- llvm/trunk/tools/macho-dump/macho-dump.cpp (original)
>> +++ llvm/trunk/tools/macho-dump/macho-dump.cpp Thu Jun 6 12:20:50 2013
>> @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObj
>> << " ('datasize', " << LLC.DataSize << ")\n"
>> << " ('_data_regions', [\n";
>>
>> - unsigned NumRegions = LLC.DataSize / 8;
>> + unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
>> for (unsigned i = 0; i < NumRegions; ++i) {
>> macho::DataInCodeTableEntry DICE =
>> Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130606/fbc3fe03/attachment.html>
More information about the llvm-commits
mailing list