[llvm] r183424 - Teach llvm-objdump with the -macho parser how to use the data in code table
Jim Grosbach
grosbach at apple.com
Thu Jun 6 13:06:50 PDT 2013
Excellent!
The test file should be in the test/Object/ARM subdirectory, though, not X86.
-Jim
On Jun 6, 2013, at 10:20 AM, Kevin Enderby <enderby at apple.com> wrote:
> Author: enderby
> Date: Thu Jun 6 12:20:50 2013
> New Revision: 183424
>
> URL: http://llvm.org/viewvc/llvm-project?rev=183424&view=rev
> Log:
> Teach llvm-objdump with the -macho parser how to use the data in code table
> from the LC_DATA_IN_CODE load command. And when disassembling print
> the data in code formatted for the kind of data it and not disassemble those
> bytes.
>
> I added the format specific functionality to the derived class MachOObjectFile
> since these tables only appears in Mach-O object files. This is my first
> attempt to modify the libObject stuff so if folks have better suggestions
> how to fit this in or suggestions on the implementation please let me know.
>
> rdar://11791371
>
> Added:
> llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7 (with props)
> llvm/trunk/test/Object/X86/macho-data-in-code.test
> Modified:
> llvm/trunk/include/llvm/Object/MachO.h
> llvm/trunk/lib/Object/MachOObjectFile.cpp
> llvm/trunk/tools/llvm-objdump/MachODump.cpp
> llvm/trunk/tools/macho-dump/macho-dump.cpp
>
> Modified: llvm/trunk/include/llvm/Object/MachO.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/MachO.h?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Object/MachO.h (original)
> +++ llvm/trunk/include/llvm/Object/MachO.h Thu Jun 6 12:20:50 2013
> @@ -25,6 +25,31 @@
> namespace llvm {
> namespace object {
>
> +/// DiceRef - This is a value type class that represents a single
> +/// data in code entry in the table in a Mach-O object file.
> +class DiceRef {
> + DataRefImpl DicePimpl;
> + const ObjectFile *OwningObject;
> +
> +public:
> + DiceRef() : OwningObject(NULL) { }
> +
> + DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
> +
> + bool operator==(const DiceRef &Other) const;
> + bool operator<(const DiceRef &Other) const;
> +
> + error_code getNext(DiceRef &Result) const;
> +
> + error_code getOffset(uint32_t &Result) const;
> + error_code getLength(uint16_t &Result) const;
> + error_code getKind(uint16_t &Result) const;
> +
> + DataRefImpl getRawDataRefImpl() const;
> + const ObjectFile *getObjectFile() const;
> +};
> +typedef content_iterator<DiceRef> dice_iterator;
> +
> class MachOObjectFile : public ObjectFile {
> public:
> struct LoadCommandInfo {
> @@ -108,6 +133,9 @@ public:
> relocation_iterator getSectionRelBegin(unsigned Index) const;
> relocation_iterator getSectionRelEnd(unsigned Index) const;
>
> + dice_iterator begin_dices() const;
> + dice_iterator end_dices() const;
> +
> // In a MachO file, sections have a segment name. This is used in the .o
> // files. They have a single segment, but this field specifies which segment
> // a section should be put in in the final object.
> @@ -152,6 +180,7 @@ public:
> getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
>
> macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
> + macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
> macho::Header getHeader() const;
> macho::Header64Ext getHeader64Ext() const;
> macho::IndirectSymbolTableEntry
> @@ -161,6 +190,7 @@ public:
> unsigned Index) const;
> macho::SymtabLoadCommand getSymtabLoadCommand() const;
> macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
> + macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
>
> StringRef getStringTableData() const;
> bool is64Bit() const;
> @@ -175,8 +205,66 @@ private:
> SectionList Sections;
> const char *SymtabLoadCmd;
> const char *DysymtabLoadCmd;
> + const char *DataInCodeLoadCmd;
> };
>
> +/// DiceRef
> +inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
> + : DicePimpl(DiceP) , OwningObject(Owner) {}
> +
> +inline bool DiceRef::operator==(const DiceRef &Other) const {
> + return DicePimpl == Other.DicePimpl;
> +}
> +
> +inline bool DiceRef::operator<(const DiceRef &Other) const {
> + return DicePimpl < Other.DicePimpl;
> +}
> +
> +inline error_code DiceRef::getNext(DiceRef &Result) const {
> + DataRefImpl Rel = DicePimpl;
> + const macho::DataInCodeTableEntry *P =
> + reinterpret_cast<const macho::DataInCodeTableEntry *>(Rel.p);
> + Rel.p = reinterpret_cast<uintptr_t>(P + 1);
> + Result = DiceRef(Rel, OwningObject);
> + return object_error::success;
> +}
> +
> +// Since a Mach-O data in code reference, a DiceRef, can only be created when
> +// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
> +// the methods that get the values of the fields of the reference.
> +
> +inline error_code DiceRef::getOffset(uint32_t &Result) const {
> + const MachOObjectFile *MachOOF =
> + static_cast<const MachOObjectFile *>(OwningObject);
> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> + Result = Dice.Offset;
> + return object_error::success;
> +}
> +
> +inline error_code DiceRef::getLength(uint16_t &Result) const {
> + const MachOObjectFile *MachOOF =
> + static_cast<const MachOObjectFile *>(OwningObject);
> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> + Result = Dice.Length;
> + return object_error::success;
> +}
> +
> +inline error_code DiceRef::getKind(uint16_t &Result) const {
> + const MachOObjectFile *MachOOF =
> + static_cast<const MachOObjectFile *>(OwningObject);
> + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
> + Result = Dice.Kind;
> + return object_error::success;
> +}
> +
> +inline DataRefImpl DiceRef::getRawDataRefImpl() const {
> + return DicePimpl;
> +}
> +
> +inline const ObjectFile *DiceRef::getObjectFile() const {
> + return OwningObject;
> +}
> +
> }
> }
>
>
> Modified: llvm/trunk/lib/Object/MachOObjectFile.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/MachOObjectFile.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Object/MachOObjectFile.cpp (original)
> +++ llvm/trunk/lib/Object/MachOObjectFile.cpp Thu Jun 6 12:20:50 2013
> @@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryB
> bool IsLittleEndian, bool Is64bits,
> error_code &ec)
> : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
> - SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
> + SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
> uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
> macho::LoadCommandType SegmentLoadType = is64Bit() ?
> macho::LCT_Segment64 : macho::LCT_Segment;
> @@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryB
> } else if (Load.C.Type == macho::LCT_Dysymtab) {
> assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
> DysymtabLoadCmd = Load.Ptr;
> + } else if (Load.C.Type == macho::LCT_DataInCode) {
> + assert(!DataInCodeLoadCmd && "Multiple data in code tables");
> + DataInCodeLoadCmd = Load.Ptr;
> } else if (Load.C.Type == SegmentLoadType) {
> uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
> for (unsigned J = 0; J < NumSections; ++J) {
> @@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::get
> return getSectionRelEnd(DRI);
> }
>
> +dice_iterator MachOObjectFile::begin_dices() const {
> + DataRefImpl DRI;
> + if (!DataInCodeLoadCmd)
> + return dice_iterator(DiceRef(DRI, this));
> +
> + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
> + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.DataOffset));
> + return dice_iterator(DiceRef(DRI, this));
> +}
> +
> +dice_iterator MachOObjectFile::end_dices() const {
> + DataRefImpl DRI;
> + if (!DataInCodeLoadCmd)
> + return dice_iterator(DiceRef(DRI, this));
> +
> + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
> + unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
> + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
> + return dice_iterator(DiceRef(DRI, this));
> +}
> +
> StringRef
> MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
> ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
> @@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefIm
> return getStruct<macho::RelocationEntry>(this, P);
> }
>
> +macho::DataInCodeTableEntry
> +MachOObjectFile::getDice(DataRefImpl Rel) const {
> + const char *P = reinterpret_cast<const char *>(Rel.p);
> + return getStruct<macho::DataInCodeTableEntry>(this, P);
> +}
> +
> macho::Header MachOObjectFile::getHeader() const {
> return getStruct<macho::Header>(this, getPtr(this, 0));
> }
> @@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFi
> return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
> }
>
> +macho::LinkeditDataLoadCommand
> +MachOObjectFile::getDataInCodeLoadCommand() const {
> + if (DataInCodeLoadCmd)
> + return getStruct<macho::LinkeditDataLoadCommand>(this, DataInCodeLoadCmd);
> +
> + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
> + macho::LinkeditDataLoadCommand Cmd;
> + Cmd.Type = macho::LCT_DataInCode;
> + Cmd.Size = macho::LinkeditLoadCommandSize;
> + Cmd.DataOffset = 0;
> + Cmd.DataSize = 0;
> + return Cmd;
> +}
> +
> StringRef MachOObjectFile::getStringTableData() const {
> macho::SymtabLoadCommand S = getSymtabLoadCommand();
> return getData().substr(S.StringTableOffset, S.StringTableSize);
>
> Added: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7?rev=183424&view=auto
> ==============================================================================
> Binary file - no diff available.
>
> Propchange: llvm/trunk/test/Object/Inputs/macho-data-in-code.macho-thumbv7
> ------------------------------------------------------------------------------
> svn:mime-type = application/octet-stream
>
> Added: llvm/trunk/test/Object/X86/macho-data-in-code.test
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/macho-data-in-code.test?rev=183424&view=auto
> ==============================================================================
> --- llvm/trunk/test/Object/X86/macho-data-in-code.test (added)
> +++ llvm/trunk/test/Object/X86/macho-data-in-code.test Thu Jun 6 12:20:50 2013
> @@ -0,0 +1,7 @@
> +RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
> +
> +CHECK: 12: 80 bd pop {r7, pc}
> +
> +CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA
> +CHECK: 16: 00 00 movs r0, r0
> +
>
> Modified: llvm/trunk/tools/llvm-objdump/MachODump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/MachODump.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-objdump/MachODump.cpp (original)
> +++ llvm/trunk/tools/llvm-objdump/MachODump.cpp Thu Jun 6 12:20:50 2013
> @@ -87,12 +87,73 @@ struct SymbolSorter {
> }
> };
>
> +// Types for the storted data in code table that is built before disassembly
> +// and the predicate function to sort them.
> +typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
> +typedef std::vector<DiceTableEntry> DiceTable;
> +typedef DiceTable::iterator dice_table_iterator;
> +
> +static bool
> +compareDiceTableEntries(const DiceTableEntry i,
> + const DiceTableEntry j) {
> + return i.first == j.first;
> +}
> +
> +static void DumpDataInCode(const char *bytes, uint64_t Size,
> + unsigned short Kind) {
> + uint64_t Value;
> +
> + switch (Kind) {
> + case macho::Data:
> + switch (Size) {
> + case 4:
> + Value = bytes[3] << 24 |
> + bytes[2] << 16 |
> + bytes[1] << 8 |
> + bytes[0];
> + outs() << "\t.long " << Value;
> + break;
> + case 2:
> + Value = bytes[1] << 8 |
> + bytes[0];
> + outs() << "\t.short " << Value;
> + break;
> + case 1:
> + Value = bytes[0];
> + outs() << "\t.byte " << Value;
> + break;
> + }
> + outs() << "\t@ KIND_DATA\n";
> + break;
> + case macho::JumpTable8:
> + Value = bytes[0];
> + outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
> + break;
> + case macho::JumpTable16:
> + Value = bytes[1] << 8 |
> + bytes[0];
> + outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
> + break;
> + case macho::JumpTable32:
> + Value = bytes[3] << 24 |
> + bytes[2] << 16 |
> + bytes[1] << 8 |
> + bytes[0];
> + outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
> + break;
> + default:
> + outs() << "\t@ data in code kind = " << Kind << "\n";
> + break;
> + }
> +}
> +
> static void
> getSectionsAndSymbols(const macho::Header Header,
> MachOObjectFile *MachOObj,
> std::vector<SectionRef> &Sections,
> std::vector<SymbolRef> &Symbols,
> - SmallVectorImpl<uint64_t> &FoundFns) {
> + SmallVectorImpl<uint64_t> &FoundFns,
> + uint64_t &BaseSegmentAddress) {
> error_code ec;
> for (symbol_iterator SI = MachOObj->begin_symbols(),
> SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
> @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Heade
>
> MachOObjectFile::LoadCommandInfo Command =
> MachOObj->getFirstLoadCommandInfo();
> + bool BaseSegmentAddressSet = false;
> for (unsigned i = 0; ; ++i) {
> if (Command.C.Type == macho::LCT_FunctionStarts) {
> // We found a function starts segment, parse the addresses for later
> @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Heade
>
> MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
> }
> + else if (Command.C.Type == macho::LCT_Segment) {
> + macho::SegmentLoadCommand SLC =
> + MachOObj->getSegmentLoadCommand(Command);
> + StringRef SegName = SLC.Name;
> + if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
> + BaseSegmentAddressSet = true;
> + BaseSegmentAddress = SLC.VMAddress;
> + }
> + }
>
> if (i == Header.NumLoadCommands - 1)
> break;
> @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(Strin
> std::vector<SectionRef> Sections;
> std::vector<SymbolRef> Symbols;
> SmallVector<uint64_t, 8> FoundFns;
> + uint64_t BaseSegmentAddress;
>
> - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
> + getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
> + BaseSegmentAddress);
>
> // Make a copy of the unsorted symbol list. FIXME: duplication
> std::vector<SymbolRef> UnsortedSymbols(Symbols);
> // Sort the symbols by address, just in case they didn't come in that way.
> std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
>
> + // Build a data in code table that is sorted on by the address of each entry.
> + uint64_t BaseAddress = 0;
> + if (Header.FileType == macho::HFT_Object)
> + Sections[0].getAddress(BaseAddress);
> + else
> + BaseAddress = BaseSegmentAddress;
> + DiceTable Dices;
> + error_code ec;
> + for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
> + DI != DE; DI.increment(ec)){
> + uint32_t Offset;
> + DI->getOffset(Offset);
> + Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
> + }
> + array_pod_sort(Dices.begin(), Dices.end());
> +
> #ifndef NDEBUG
> raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
> #else
> @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(Strin
> for (uint64_t Index = Start; Index < End; Index += Size) {
> MCInst Inst;
>
> + uint64_t SectAddress = 0;
> + Sections[SectIdx].getAddress(SectAddress);
> + outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
> +
> + // Check the data in code table here to see if this is data not an
> + // instruction to be disassembled.
> + DiceTable Dice;
> + Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
> + dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
> + Dice.begin(), Dice.end(),
> + compareDiceTableEntries);
> + if (DTI != Dices.end()){
> + uint16_t Length;
> + DTI->second.getLength(Length);
> + DumpBytes(StringRef(Bytes.data() + Index, Length));
> + uint16_t Kind;
> + DTI->second.getKind(Kind);
> + DumpDataInCode(Bytes.data() + Index, Length, Kind);
> + continue;
> + }
> +
> if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
> DebugOut, nulls())) {
> - uint64_t SectAddress = 0;
> - Sections[SectIdx].getAddress(SectAddress);
> - outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
> -
> DumpBytes(StringRef(Bytes.data() + Index, Size));
> IP->printInst(&Inst, outs(), "");
>
>
> Modified: llvm/trunk/tools/macho-dump/macho-dump.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/macho-dump/macho-dump.cpp?rev=183424&r1=183423&r2=183424&view=diff
> ==============================================================================
> --- llvm/trunk/tools/macho-dump/macho-dump.cpp (original)
> +++ llvm/trunk/tools/macho-dump/macho-dump.cpp Thu Jun 6 12:20:50 2013
> @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObj
> << " ('datasize', " << LLC.DataSize << ")\n"
> << " ('_data_regions', [\n";
>
> - unsigned NumRegions = LLC.DataSize / 8;
> + unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
> for (unsigned i = 0; i < NumRegions; ++i) {
> macho::DataInCodeTableEntry DICE =
> Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130606/92c3b09b/attachment.html>
More information about the llvm-commits
mailing list