[llvm] r357869 - [llvm-objdump] Split disassembleObject and simplify --{start, stop}-address handling

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 7 09:33:25 PDT 2019


Author: maskray
Date: Sun Apr  7 09:33:24 2019
New Revision: 357869

URL: http://llvm.org/viewvc/llvm-project?rev=357869&view=rev
Log:
[llvm-objdump] Split disassembleObject and simplify --{start,stop}-address handling

The main disassembly loop is hard to read due to special handling of ARM
ELF data & ELF data. Split off the logic into two functions
dumpARMELFData and dumpELFData. Hoist some checks outside of the loop.

--start-address --stop-address have redundant checks and minor off-by-1
issues. Fix them.

Modified:
    llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp

Modified: llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp?rev=357869&r1=357868&r2=357869&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp (original)
+++ llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp Sun Apr  7 09:33:24 2019
@@ -910,6 +910,76 @@ static bool shouldAdjustVA(const Section
   return false;
 }
 
+static uint64_t
+dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+               const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
+               const std::vector<uint64_t> &TextMappingSymsAddr) {
+  support::endianness Endian =
+      Obj->isLittleEndian() ? support::little : support::big;
+  while (Index < End) {
+    outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+    outs() << "\t";
+    if (Index + 4 <= End) {
+      dumpBytes(Bytes.slice(Index, 4), outs());
+      outs() << "\t.word\t"
+             << format_hex(
+                    support::endian::read32(Bytes.data() + Index, Endian), 10);
+      Index += 4;
+    } else if (Index + 2 <= End) {
+      dumpBytes(Bytes.slice(Index, 2), outs());
+      outs() << "\t\t.short\t"
+             << format_hex(
+                    support::endian::read16(Bytes.data() + Index, Endian), 6);
+      Index += 2;
+    } else {
+      dumpBytes(Bytes.slice(Index, 1), outs());
+      outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
+      ++Index;
+    }
+    outs() << "\n";
+    if (std::binary_search(TextMappingSymsAddr.begin(),
+                           TextMappingSymsAddr.end(), Index))
+      break;
+  }
+  return Index;
+}
+
+static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+                        ArrayRef<uint8_t> Bytes) {
+  // print out data up to 8 bytes at a time in hex and ascii
+  uint8_t AsciiData[9] = {'\0'};
+  uint8_t Byte;
+  int NumBytes = 0;
+
+  for (; Index < End; ++Index) {
+    if (NumBytes == 0) {
+      outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+      outs() << "\t";
+    }
+    Byte = Bytes.slice(Index)[0];
+    outs() << format(" %02x", Byte);
+    AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
+
+    uint8_t IndentOffset = 0;
+    NumBytes++;
+    if (Index == End - 1 || NumBytes > 8) {
+      // Indent the space for less than 8 bytes data.
+      // 2 spaces for byte and one for space between bytes
+      IndentOffset = 3 * (8 - NumBytes);
+      for (int Excess = NumBytes; Excess < 8; Excess++)
+        AsciiData[Excess] = '\0';
+      NumBytes = 8;
+    }
+    if (NumBytes == 8) {
+      AsciiData[8] = '\0';
+      outs() << std::string(IndentOffset, ' ') << "         ";
+      outs() << reinterpret_cast<char *>(AsciiData);
+      outs() << '\n';
+      NumBytes = 0;
+    }
+  }
+}
+
 static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
                               MCContext &Ctx, MCDisassembler *DisAsm,
                               const MCInstrAnalysis *MIA, MCInstPrinter *IP,
@@ -1081,10 +1151,13 @@ static void disassembleObject(const Targ
 
       // Check if we need to skip symbol
       // Skip if the symbol's data is not between StartAddress and StopAddress
-      if (End + SectionAddr < StartAddress ||
-          Start + SectionAddr > StopAddress) {
+      if (End + SectionAddr <= StartAddress ||
+          Start + SectionAddr >= StopAddress)
         continue;
-      }
+
+      // Stop disassembly at the stop address specified
+      if (End + SectionAddr > StopAddress)
+        End = StopAddress - SectionAddr;
 
       /// Skip if user requested specific symbols and this is not in the list
       if (!DisasmFuncsSet.empty() &&
@@ -1099,10 +1172,6 @@ static void disassembleObject(const Targ
         outs() << SectionName << ':';
       }
 
-      // Stop disassembly at the stop address specified
-      if (End + SectionAddr > StopAddress)
-        End = StopAddress - SectionAddr;
-
       if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
         if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
           // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
@@ -1150,102 +1219,38 @@ static void disassembleObject(const Targ
                             SectionAddr + Start, DebugOut, CommentStream);
       Start += Size;
 
-      for (Index = Start; Index < End; Index += Size) {
-        MCInst Inst;
+      Index = Start;
+      if (SectionAddr < StartAddress)
+        Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
+
+      // If there is a data symbol inside an ELF text section and we are
+      // only disassembling text (applicable all architectures), we are in a
+      // situation where we must print the data and not disassemble it.
+      if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
+          !DisassembleAll && Section.isText()) {
+        dumpELFData(SectionAddr, Index, End, Bytes);
+        Index = End;
+      }
 
-        if (Index + SectionAddr < StartAddress ||
-            Index + SectionAddr > StopAddress) {
-          // skip byte by byte till StartAddress is reached
-          Size = 1;
-          continue;
-        }
-        // AArch64 ELF binaries can interleave data and text in the
-        // same section. We rely on the markers introduced to
-        // understand what we need to dump. If the data marker is within a
-        // function, it is denoted as a word/short etc
-        if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
-            !DisassembleAll &&
+      bool CheckARMELFData = isArmElf(Obj) &&
+                             std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
+                             !DisassembleAll;
+      MCInst Inst;
+      while (Index < End) {
+        // AArch64 ELF binaries can interleave data and text in the same
+        // section. We rely on the markers introduced to understand what we
+        // need to dump. If the data marker is within a function, it is
+        // denoted as a word/short etc.
+        if (CheckARMELFData &&
             std::binary_search(DataMappingSymsAddr.begin(),
                                DataMappingSymsAddr.end(), Index)) {
-          // Switch to data.
-          support::endianness Endian =
-              Obj->isLittleEndian() ? support::little : support::big;
-          while (Index < End) {
-            outs() << format("%8" PRIx64 ":", SectionAddr + Index);
-            outs() << "\t";
-            if (Index + 4 <= End) {
-              dumpBytes(Bytes.slice(Index, 4), outs());
-              outs() << "\t.word\t"
-                     << format_hex(support::endian::read32(Bytes.data() + Index,
-                                                           Endian),
-                                   10);
-              Index += 4;
-            } else if (Index + 2 <= End) {
-              dumpBytes(Bytes.slice(Index, 2), outs());
-              outs() << "\t\t.short\t"
-                     << format_hex(support::endian::read16(Bytes.data() + Index,
-                                                           Endian),
-                                   6);
-              Index += 2;
-            } else {
-              dumpBytes(Bytes.slice(Index, 1), outs());
-              outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
-              ++Index;
-            }
-            outs() << "\n";
-            if (std::binary_search(TextMappingSymsAddr.begin(),
-                                   TextMappingSymsAddr.end(), Index))
-              break;
-          }
-        }
-
-        // If there is a data symbol inside an ELF text section and we are only
-        // disassembling text (applicable all architectures),
-        // we are in a situation where we must print the data and not
-        // disassemble it.
-        if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
-            !DisassembleAll && Section.isText()) {
-          // print out data up to 8 bytes at a time in hex and ascii
-          uint8_t AsciiData[9] = {'\0'};
-          uint8_t Byte;
-          int NumBytes = 0;
-
-          for (Index = Start; Index < End; Index += 1) {
-            if (((SectionAddr + Index) < StartAddress) ||
-                ((SectionAddr + Index) > StopAddress))
-              continue;
-            if (NumBytes == 0) {
-              outs() << format("%8" PRIx64 ":", SectionAddr + Index);
-              outs() << "\t";
-            }
-            Byte = Bytes.slice(Index)[0];
-            outs() << format(" %02x", Byte);
-            AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
-
-            uint8_t IndentOffset = 0;
-            NumBytes++;
-            if (Index == End - 1 || NumBytes > 8) {
-              // Indent the space for less than 8 bytes data.
-              // 2 spaces for byte and one for space between bytes
-              IndentOffset = 3 * (8 - NumBytes);
-              for (int Excess = NumBytes; Excess < 8; Excess++)
-                AsciiData[Excess] = '\0';
-              NumBytes = 8;
-            }
-            if (NumBytes == 8) {
-              AsciiData[8] = '\0';
-              outs() << std::string(IndentOffset, ' ') << "         ";
-              outs() << reinterpret_cast<char *>(AsciiData);
-              outs() << '\n';
-              NumBytes = 0;
-            }
-          }
+          Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
+                                 TextMappingSymsAddr);
+          continue;
         }
-        if (Index >= End)
-          break;
 
-        // When -z or --disassemble-zeroes are given we always dissasemble them.
-        // Otherwise we might want to skip zero bytes we see.
+        // When -z or --disassemble-zeroes are given we always dissasemble
+        // them. Otherwise we might want to skip zero bytes we see.
         if (!DisassembleZeroes) {
           uint64_t MaxOffset = End - Index;
           // For -reloc: print zero blocks patched by relocations, so that
@@ -1257,23 +1262,23 @@ static void disassembleObject(const Targ
                   countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
             outs() << "\t\t..." << '\n';
             Index += N;
-            if (Index >= End)
-              break;
+            continue;
           }
         }
 
         // Disassemble a real instruction or a data when disassemble all is
         // provided
-        bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
-                                                   SectionAddr + Index, DebugOut,
-                                                   CommentStream);
+        Inst.clear();
+        bool Disassembled = DisAsm->getInstruction(
+            Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
+            CommentStream);
         if (Size == 0)
           Size = 1;
 
-        PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
-                      Bytes.slice(Index, Size),
-                      {SectionAddr + Index + VMAAdjustment, Section.getIndex()},
-                      outs(), "", *STI, &SP, &Rels);
+        PIP.printInst(
+            *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
+            {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
+            "", *STI, &SP, &Rels);
         outs() << CommentStream.str();
         Comments.clear();
 
@@ -1343,7 +1348,7 @@ static void disassembleObject(const Targ
           while (RelCur != RelEnd) {
             uint64_t Offset = RelCur->getOffset();
             // If this relocation is hidden, skip it.
-            if (getHidden(*RelCur) || ((SectionAddr + Offset) < StartAddress)) {
+            if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
               ++RelCur;
               continue;
             }
@@ -1357,7 +1362,7 @@ static void disassembleObject(const Targ
               Expected<section_iterator> SymSI =
                   RelCur->getSymbol()->getSection();
               if (SymSI && *SymSI != Obj->section_end() &&
-                  (shouldAdjustVA(**SymSI)))
+                  shouldAdjustVA(**SymSI))
                 Offset += AdjustVMA;
             }
 
@@ -1366,6 +1371,8 @@ static void disassembleObject(const Targ
             ++RelCur;
           }
         }
+
+        Index += Size;
       }
     }
   }




More information about the llvm-commits mailing list