[llvm] r363903 - [llvm-objdump] Switch between ARM/Thumb based on mapping symbols.

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 17:29:40 PDT 2019


Author: efriedma
Date: Wed Jun 19 17:29:40 2019
New Revision: 363903

URL: http://llvm.org/viewvc/llvm-project?rev=363903&view=rev
Log:
[llvm-objdump] Switch between ARM/Thumb based on mapping symbols.

The ARMDisassembler changes allow changing between ARM and Thumb mode
based on the MCSubtargetInfo, rather than the Target, which simplifies
the other changes a bit.

I'm not really happy with adding more target-specific logic to
tools/llvm-objdump/, but there isn't any easy way around it: the logic
in question specifically applies to disassembling an object file, and
that code simply isn't located in lib/Target, at least at the moment.

Differential Revision: https://reviews.llvm.org/D60927


Modified:
    llvm/trunk/include/llvm/Object/ELFObjectFile.h
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
    llvm/trunk/test/CodeGen/ARM/inlineasm-switch-mode.ll
    llvm/trunk/test/tools/llvm-objdump/ARM/v7r-subfeatures.s
    llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp

Modified: llvm/trunk/include/llvm/Object/ELFObjectFile.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/ELFObjectFile.h?rev=363903&r1=363902&r2=363903&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Object/ELFObjectFile.h (original)
+++ llvm/trunk/include/llvm/Object/ELFObjectFile.h Wed Jun 19 17:29:40 2019
@@ -54,7 +54,6 @@ class ELFObjectFileBase : public ObjectF
 protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
 
-  virtual uint16_t getEMachine() const = 0;
   virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
@@ -91,6 +90,8 @@ public:
 
   virtual uint16_t getEType() const = 0;
 
+  virtual uint16_t getEMachine() const = 0;
+
   std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
 };
 

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=363903&r1=363902&r2=363903&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Wed Jun 19 17:29:40 2019
@@ -139,23 +139,18 @@ public:
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
-};
-
-/// Thumb disassembler for all Thumb platforms.
-class ThumbDisassembler : public MCDisassembler {
-public:
-  ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
-    MCDisassembler(STI, Ctx) {
-  }
-
-  ~ThumbDisassembler() override = default;
-
-  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
-                              ArrayRef<uint8_t> Bytes, uint64_t Address,
-                              raw_ostream &VStream,
-                              raw_ostream &CStream) const override;
 
 private:
+  DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
+                                 ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                 raw_ostream &VStream,
+                                 raw_ostream &CStream) const;
+
+  DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
+                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                   raw_ostream &VStream,
+                                   raw_ostream &CStream) const;
+
   mutable ITStatus ITBlock;
   mutable VPTStatus VPTBlock;
 
@@ -519,12 +514,6 @@ static MCDisassembler *createARMDisassem
   return new ARMDisassembler(STI, Ctx);
 }
 
-static MCDisassembler *createThumbDisassembler(const Target &T,
-                                               const MCSubtargetInfo &STI,
-                                               MCContext &Ctx) {
-  return new ThumbDisassembler(STI, Ctx);
-}
-
 // Post-decoding checks
 static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
                                             uint64_t Address, raw_ostream &OS,
@@ -562,6 +551,16 @@ DecodeStatus ARMDisassembler::getInstruc
                                              ArrayRef<uint8_t> Bytes,
                                              uint64_t Address, raw_ostream &OS,
                                              raw_ostream &CS) const {
+  if (STI.getFeatureBits()[ARM::ModeThumb])
+    return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
+  return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
+}
+
+DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
+                                                ArrayRef<uint8_t> Bytes,
+                                                uint64_t Address,
+                                                raw_ostream &OS,
+                                                raw_ostream &CS) const {
   CommentStream = &CS;
 
   assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -698,7 +697,7 @@ static bool isVectorPredicable(unsigned
 // to fix up the predicate operands using this context information as a
 // post-pass.
 MCDisassembler::DecodeStatus
-ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
   MCDisassembler::DecodeStatus S = Success;
 
   const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
@@ -813,7 +812,7 @@ ThumbDisassembler::AddThumbPredicate(MCI
 // mode, the auto-generated decoder will give them an (incorrect)
 // predicate operand.  We need to rewrite these operands based on the IT
 // context as a post-pass.
-void ThumbDisassembler::UpdateThumbVFPPredicate(
+void ARMDisassembler::UpdateThumbVFPPredicate(
   DecodeStatus &S, MCInst &MI) const {
   unsigned CC;
   CC = ITBlock.getITCC();
@@ -844,11 +843,11 @@ void ThumbDisassembler::UpdateThumbVFPPr
   }
 }
 
-DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                               ArrayRef<uint8_t> Bytes,
-                                               uint64_t Address,
-                                               raw_ostream &OS,
-                                               raw_ostream &CS) const {
+DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
+                                                  ArrayRef<uint8_t> Bytes,
+                                                  uint64_t Address,
+                                                  raw_ostream &OS,
+                                                  raw_ostream &CS) const {
   CommentStream = &CS;
 
   assert(STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -1046,9 +1045,9 @@ extern "C" void LLVMInitializeARMDisasse
   TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
                                          createARMDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
-                                         createThumbDisassembler);
+                                         createARMDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
-                                         createThumbDisassembler);
+                                         createARMDisassembler);
 }
 
 static const uint16_t GPRDecoderTable[] = {

Modified: llvm/trunk/test/CodeGen/ARM/inlineasm-switch-mode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/inlineasm-switch-mode.ll?rev=363903&r1=363902&r2=363903&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/inlineasm-switch-mode.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/inlineasm-switch-mode.ll Wed Jun 19 17:29:40 2019
@@ -1,7 +1,4 @@
-;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t
-; Two pass decoding needed because llvm-objdump does not respect mapping symbols
-;RUN: llvm-objdump -triple=armv7   -d %t | FileCheck %s --check-prefix=ARM
-;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB
+;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -d - | FileCheck %s
 
 define hidden i32 @bah(i8* %start) #0 align 2 {
   %1 = ptrtoint i8* %start to i32
@@ -10,13 +7,7 @@ define hidden i32 @bah(i8* %start) #0 al
   ret i32 %3
 }
 
-; ARM: $a
-; ARM-NEXT: 04 70 2d e5     str     r7, [sp, #-4]!
-; ARM: $t
-; ARM-NEXT: 48 1c
-
-; THUMB: $a{{.*}}:
-; THUMB-NEXT: 04 70
-; THUMB-NEXT: 2d e5
-; THUMB: $t{{.*}}:
-; THUMB-NEXT: 48 1c   adds    r0, r1, #1
+; CHECK: $a{{.*}}:
+; CHECK-NEXT: 04 70 2d e5     str     r7, [sp, #-4]!
+; CHECK: $t{{.*}}:
+; CHECK-NEXT: 48 1c   adds    r0, r1, #1

Modified: llvm/trunk/test/tools/llvm-objdump/ARM/v7r-subfeatures.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objdump/ARM/v7r-subfeatures.s?rev=363903&r1=363902&r2=363903&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-objdump/ARM/v7r-subfeatures.s (original)
+++ llvm/trunk/test/tools/llvm-objdump/ARM/v7r-subfeatures.s Wed Jun 19 17:29:40 2019
@@ -1,5 +1,6 @@
-@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=thumb -d - | FileCheck %s
-@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=arm -d - | FileCheck %s --check-prefix=CHECK-ARM
+@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -d - | FileCheck %s
+@ v7r implies Thumb hwdiv, but ARM hwdiv is optional
+@ FIXME: Does that imply we should actually refuse to disassemble it?
 
 .eabi_attribute Tag_CPU_arch, 10 // v7
 .eabi_attribute Tag_CPU_arch_profile, 0x52 // 'R' profile
@@ -9,8 +10,7 @@ div_arm:
   udiv r0, r1, r2
 
 @CHECK-LABEL: div_arm
- at CHECK-NOT: udiv r0, r1, r2
- at CHECK-ARM-NOT: udiv r0, r1, r2
+ at CHECK: 11 f2 30 e7 <unknown>
 
 .thumb
 div_thumb:

Modified: llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp?rev=363903&r1=363902&r2=363903&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp (original)
+++ llvm/trunk/tools/llvm-objdump/llvm-objdump.cpp Wed Jun 19 17:29:40 2019
@@ -603,13 +603,18 @@ void SourcePrinter::printSourceLine(raw_
   OldLineInfo = LineInfo;
 }
 
+static bool isAArch64Elf(const ObjectFile *Obj) {
+  const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+  return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
+}
+
 static bool isArmElf(const ObjectFile *Obj) {
-  return (Obj->isELF() &&
-          (Obj->getArch() == Triple::aarch64 ||
-           Obj->getArch() == Triple::aarch64_be ||
-           Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
-           Obj->getArch() == Triple::thumb ||
-           Obj->getArch() == Triple::thumbeb));
+  const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+  return Elf && Elf->getEMachine() == ELF::EM_ARM;
+}
+
+static bool hasMappingSymbols(const ObjectFile *Obj) {
+  return isArmElf(Obj) || isAArch64Elf(Obj);
 }
 
 static void printRelocation(const RelocationRef &Rel, uint64_t Address,
@@ -954,10 +959,24 @@ static bool shouldAdjustVA(const Section
   return false;
 }
 
+
+typedef std::pair<uint64_t, char> MappingSymbolPair;
+static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
+                                 uint64_t Address) {
+  auto Sym = bsearch(MappingSymbols, [Address](const MappingSymbolPair &Val) {
+      return Val.first > Address;
+  });
+  // Return zero for any address before the first mapping symbol; this means
+  // we should use the default disassembly mode, depending on the target.
+  if (Sym == MappingSymbols.begin())
+    return '\x00';
+  return (Sym - 1)->second;
+}
+
 static uint64_t
 dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
                const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
-               const std::vector<uint64_t> &TextMappingSymsAddr) {
+               ArrayRef<MappingSymbolPair> MappingSymbols) {
   support::endianness Endian =
       Obj->isLittleEndian() ? support::little : support::big;
   while (Index < End) {
@@ -981,8 +1000,7 @@ dumpARMELFData(uint64_t SectionAddr, uin
       ++Index;
     }
     outs() << "\n";
-    if (std::binary_search(TextMappingSymsAddr.begin(),
-                           TextMappingSymsAddr.end(), Index))
+    if (getMappingSymbolKind(MappingSymbols, Index) != 'd')
       break;
   }
   return Index;
@@ -1023,10 +1041,19 @@ static void dumpELFData(uint64_t Section
 }
 
 static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
-                              MCContext &Ctx, MCDisassembler *DisAsm,
+                              MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
+                              MCDisassembler *SecondaryDisAsm,
                               const MCInstrAnalysis *MIA, MCInstPrinter *IP,
-                              const MCSubtargetInfo *STI, PrettyPrinter &PIP,
+                              const MCSubtargetInfo *PrimarySTI,
+                              const MCSubtargetInfo *SecondarySTI,
+                              PrettyPrinter &PIP,
                               SourcePrinter &SP, bool InlineRelocs) {
+  const MCSubtargetInfo *STI = PrimarySTI;
+  MCDisassembler *DisAsm = PrimaryDisAsm;
+  bool PrimaryIsThumb = false;
+  if (isArmElf(Obj))
+    PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
+
   std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
   if (InlineRelocs)
     RelocMap = getRelocsMap(*Obj);
@@ -1113,25 +1140,23 @@ static void disassembleObject(const Targ
 
     // Get the list of all the symbols in this section.
     SectionSymbolsTy &Symbols = AllSymbols[Section];
-    std::vector<uint64_t> DataMappingSymsAddr;
-    std::vector<uint64_t> TextMappingSymsAddr;
-    if (isArmElf(Obj)) {
+    std::vector<MappingSymbolPair> MappingSymbols;
+    if (hasMappingSymbols(Obj)) {
       for (const auto &Symb : Symbols) {
         uint64_t Address = std::get<0>(Symb);
         StringRef Name = std::get<1>(Symb);
         if (Name.startswith("$d"))
-          DataMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'd');
         if (Name.startswith("$x"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'x');
         if (Name.startswith("$a"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 'a');
         if (Name.startswith("$t"))
-          TextMappingSymsAddr.push_back(Address - SectionAddr);
+          MappingSymbols.emplace_back(Address - SectionAddr, 't');
       }
     }
 
-    llvm::sort(DataMappingSymsAddr);
-    llvm::sort(TextMappingSymsAddr);
+    llvm::sort(MappingSymbols);
 
     if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
       // AMDGPU disassembler uses symbolizer for printing labels
@@ -1269,19 +1294,18 @@ static void disassembleObject(const Targ
         Index = End;
       }
 
-      bool CheckARMELFData = isArmElf(Obj) &&
+      bool CheckARMELFData = hasMappingSymbols(Obj) &&
                              std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
                              !DisassembleAll;
       while (Index < End) {
-        // AArch64 ELF binaries can interleave data and text in the same
-        // section. We rely on the markers introduced to understand what we
-        // need to dump. If the data marker is within a function, it is
+        // ARM and AArch64 ELF binaries can interleave data and text in the
+        // same section. We rely on the markers introduced to understand what
+        // we need to dump. If the data marker is within a function, it is
         // denoted as a word/short etc.
         if (CheckARMELFData &&
-            std::binary_search(DataMappingSymsAddr.begin(),
-                               DataMappingSymsAddr.end(), Index)) {
+            getMappingSymbolKind(MappingSymbols, Index) == 'd') {
           Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
-                                 TextMappingSymsAddr);
+                                 MappingSymbols);
           continue;
         }
 
@@ -1302,6 +1326,16 @@ static void disassembleObject(const Targ
           }
         }
 
+        if (SecondarySTI) {
+          if (getMappingSymbolKind(MappingSymbols, Index) == 'a') {
+            STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
+            DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
+          } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') {
+            STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
+            DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
+          }
+        }
+
         // Disassemble a real instruction or a data when disassemble all is
         // provided
         MCInst Inst;
@@ -1459,6 +1493,22 @@ static void disassembleObject(const Obje
     report_error(Obj->getFileName(),
                  "no disassembler for target " + TripleName);
 
+  // If we have an ARM object file, we need a second disassembler, because
+  // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
+  // We use mapping symbols to switch between the two assemblers, where
+  // appropriate.
+  std::unique_ptr<MCDisassembler> SecondaryDisAsm;
+  std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
+  if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
+    if (STI->checkFeatures("+thumb-mode"))
+      Features.AddFeature("-thumb-mode");
+    else
+      Features.AddFeature("+thumb-mode");
+    SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
+                                                        Features.getString()));
+    SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
+  }
+
   std::unique_ptr<const MCInstrAnalysis> MIA(
       TheTarget->createMCInstrAnalysis(MII.get()));
 
@@ -1477,8 +1527,9 @@ static void disassembleObject(const Obje
     if (!IP->applyTargetSpecificCLOption(Opt))
       error("Unrecognized disassembler option: " + Opt);
 
-  disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), MIA.get(), IP.get(),
-                    STI.get(), PIP, SP, InlineRelocs);
+  disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
+                    MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
+                    SP, InlineRelocs);
 }
 
 void printRelocations(const ObjectFile *Obj) {




More information about the llvm-commits mailing list