[llvm] 8710eff - [MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly

Tim Renouf via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 26 05:57:02 PDT 2021


Author: Tim Renouf
Date: 2021-04-26T13:56:36+01:00
New Revision: 8710eff6c3bab333bd270573fc349938b9a69dc0

URL: https://github.com/llvm/llvm-project/commit/8710eff6c3bab333bd270573fc349938b9a69dc0
DIFF: https://github.com/llvm/llvm-project/commit/8710eff6c3bab333bd270573fc349938b9a69dc0.diff

LOG: [MC][AMDGPU][llvm-objdump] Synthesized local labels in disassembly

1. Add an accessor function to MCSymbolizer to retrieve addresses
   referenced by a symbolizable operand, but not resolved to a symbol.
   That way, the caller can synthesize labels at those addresses and
   then retry disassembling the section.

2. Implement that in AMDGPU -- a failed symbol lookup results in the
   address being added to a vector returned by the new function.

3. Use that in llvm-objdump when using MCSymbolizer (which only happens
   on AMDGPU) and SymbolizeOperands is on.

Differential Revision: https://reviews.llvm.org/D101145

Change-Id: I19087c3bbfece64bad5a56ee88bcc9110d83989e

Added: 
    llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml

Modified: 
    llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
    llvm/tools/llvm-objdump/llvm-objdump.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index b7ca83a5f16c..b966106007db 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
 #define LLVM_MC_MCDISASSEMBLER_MCSYMBOLIZER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include <algorithm>
 #include <cstdint>
@@ -75,6 +76,17 @@ class MCSymbolizer {
   virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
                                                int64_t Value,
                                                uint64_t Address) = 0;
+
+  /// Get the MCSymbolizer's list of addresses that were referenced by
+  /// symbolizable operands but not resolved to a symbol. The caller (some
+  /// code that is disassembling a section or other chunk of code) would
+  /// typically create a synthetic label at each address and add them to its
+  /// list of symbols in the section, before creating a new MCSymbolizer with
+  /// the enhanced symbol list and retrying disassembling the section.
+  /// The returned array is unordered and may have duplicates.
+  /// The returned ArrayRef stops being valid on any call to or destruction of
+  /// the MCSymbolizer object.
+  virtual ArrayRef<uint64_t> getReferencedAddresses() const { return {}; }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 499c72409f54..7507f681678d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1834,6 +1834,8 @@ bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
     Inst.addOperand(MCOperand::createExpr(Add));
     return true;
   }
+  // Add to list of referenced addresses, so caller can synthesize a label.
+  ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
   return false;
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 7c6d3afbe788..93e2f636bdab 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -183,6 +183,7 @@ class AMDGPUDisassembler : public MCDisassembler {
 class AMDGPUSymbolizer : public MCSymbolizer {
 private:
   void *DisInfo;
+  std::vector<uint64_t> ReferencedAddresses;
 
 public:
   AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo,
@@ -197,6 +198,10 @@ class AMDGPUSymbolizer : public MCSymbolizer {
   void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
                                        int64_t Value,
                                        uint64_t Address) override;
+
+  ArrayRef<uint64_t> getReferencedAddresses() const override {
+    return ReferencedAddresses;
+  }
 };
 
 } // end namespace llvm

diff  --git a/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml
new file mode 100644
index 000000000000..563864633902
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/AMDGPU/elf-disassemble-symbolize-operands.yaml
@@ -0,0 +1,89 @@
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \
+# RUN:   FileCheck %s
+
+## Expect to find the branch labels.
+# CHECK: <break_cond_is_arg>:
+# CHECK:     s_branch L1
+# CHECK: <L0>:
+# CHECK:     s_cbranch_execz L2
+# CHECK: <L1>:
+# CHECK:     s_branch L0
+# CHECKL <L2>:
+
+# I created this YAML starting with this LLVM IR:
+#
+#   define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
+#   entry:
+#     br label %loop
+#   loop:
+#     %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
+#     %tmp23 = add nuw i32 %tmp23phi, 1
+#     %tmp27 = icmp ult i32 %arg, %tmp23
+#     br i1 %tmp27, label %then, label %endif
+#   then:                                             ; preds = %bb
+#     call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
+#     br label %endif
+#   endif:                                             ; preds = %bb28, %bb
+#     br i1 %breakcond, label %loop, label %loopexit
+#   loopexit:
+#     ret void
+#   }
+#   
+#   declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
+#   
+#   attributes #0 = { nounwind writeonly }
+#
+# I compiled it to a relocatable ELF:
+#
+#   llc -march=amdgcn -mcpu=gfx1030 llvm/a.ll -filetype=obj -o a.elf
+#
+# then converted it to YAML:
+#
+#   obj2yaml a.elf
+#
+# then manually removed the BB0_1 etc local symbols.
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_AMDGPU
+  Flags:           [ EF_AMDGPU_MACH_AMDGCN_GFX1030 ]
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x4
+    Content:         00008CBF0000FDBB81020236810385BE800384BE8102847D6AC10689040082BF7E077E88058105817E047E8A080088BF0500887D7E060787070404886A3C87BEF7FF88BF000070E000000104F4FF82BF7E047E880000FDBB1E2080BE
+  - Name:            .AMDGPU.config
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         48B80000000000004CB800000000000060B800000000000004000000000000000800000000000000
+  - Name:            .note.GNU-stack
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+  - Name:            .note
+    Type:            SHT_NOTE
+    AddressAlign:    0x4
+    Notes:
+      - Name:            AMD
+        Desc:            616D6467636E2D756E6B6E6F776E2D6C696E75782D676E752D67667831303330
+        Type:            NT_FREEBSD_PROCSTAT_GROUPS
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .strtab
+      - Name:            .shstrtab
+      - Name:            .text
+      - Name:            .AMDGPU.config
+      - Name:            .note.GNU-stack
+      - Name:            .note
+      - Name:            .symtab
+Symbols:
+  - Name:            break_cond_is_arg
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Size:            0x5C
+...

diff  --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 57aac91baaf9..4b2ca6560677 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -972,6 +972,62 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
   }
 }
 
+// Create an MCSymbolizer for the target and add it to the MCDisassembler.
+// This is currently only used on AMDGPU, and assumes the format of the
+// void * argument passed to AMDGPU's createMCSymbolizer.
+static void addSymbolizer(MCContext &Ctx, const Target *Target,
+                          StringRef TripleName, MCDisassembler *DisAsm,
+                          uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
+                          SectionSymbolsTy &Symbols,
+                          std::vector<std::string *> &SynthesizedLabelNames) {
+
+  std::unique_ptr<MCRelocationInfo> RelInfo(
+      Target->createMCRelocationInfo(TripleName, Ctx));
+  if (!RelInfo)
+    return;
+  std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
+      TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
+  MCSymbolizer *SymbolizerPtr = &*Symbolizer;
+  DisAsm->setSymbolizer(std::move(Symbolizer));
+
+  if (!SymbolizeOperands)
+    return;
+
+  // Synthesize labels referenced by branch instructions by
+  // disassembling, discarding the output, and collecting the referenced
+  // addresses from the symbolizer.
+  for (size_t Index = 0; Index != Bytes.size();) {
+    MCInst Inst;
+    uint64_t Size;
+    DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
+                           nulls());
+    if (Size == 0)
+      Size = 1;
+    Index += Size;
+  }
+  ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
+  // Copy and sort to remove duplicates.
+  std::vector<uint64_t> LabelAddrs;
+  LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
+                    LabelAddrsRef.end());
+  llvm::sort(LabelAddrs);
+  LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
+                    LabelAddrs.begin());
+  // Add the labels.
+  for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
+    SynthesizedLabelNames.push_back(
+        new std::string((Twine("L") + Twine(LabelNum)).str()));
+    Symbols.push_back(SymbolInfoTy(
+        LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
+  }
+  llvm::stable_sort(Symbols);
+  // Recreate the symbolizer with the new symbols list.
+  RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
+  Symbolizer.reset(Target->createMCSymbolizer(
+      TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
+  DisAsm->setSymbolizer(std::move(Symbolizer));
+}
+
 static StringRef getSegmentName(const MachOObjectFile *MachO,
                                 const SectionRef &Section) {
   if (MachO) {
@@ -1134,16 +1190,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
 
     llvm::sort(MappingSymbols);
 
+    ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
+        unwrapOrError(Section.getContents(), Obj->getFileName()));
+
+    std::vector<std::string *> SynthesizedLabelNames;
     if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
       // AMDGPU disassembler uses symbolizer for printing labels
-      std::unique_ptr<MCRelocationInfo> RelInfo(
-        TheTarget->createMCRelocationInfo(TripleName, Ctx));
-      if (RelInfo) {
-        std::unique_ptr<MCSymbolizer> Symbolizer(
-          TheTarget->createMCSymbolizer(
-            TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
-        DisAsm->setSymbolizer(std::move(Symbolizer));
-      }
+      addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
+                    Symbols, SynthesizedLabelNames);
     }
 
     StringRef SegmentName = getSegmentName(MachO, Section);
@@ -1159,9 +1213,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
     SmallString<40> Comments;
     raw_svector_ostream CommentStream(Comments);
 
-    ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
-        unwrapOrError(Section.getContents(), Obj->getFileName()));
-
     uint64_t VMAAdjustment = 0;
     if (shouldAdjustVA(Section))
       VMAAdjustment = AdjustVMA;


        


More information about the llvm-commits mailing list