[llvm] [feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv (PR #109914)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 10 21:44:07 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: Arjun Patel (arjunUpatel)

<details>
<summary>Changes</summary>

Resolves #<!-- -->108469

---
Full diff: https://github.com/llvm/llvm-project/pull/109914.diff


9 Files Affected:

- (modified) llvm/include/llvm/MC/MCInstrAnalysis.h (+5) 
- (modified) llvm/lib/MC/MCInstrAnalysis.cpp (+6) 
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp (+103-10) 
- (added) llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar () 
- (added) llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage () 
- (added) llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg (+2) 
- (added) llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s (+111) 
- (added) llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s (+57) 
- (modified) llvm/tools/llvm-objdump/llvm-objdump.cpp (+6-5) 


``````````diff
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 63a4e02a92360..1f05e8546c3d1 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -182,6 +182,11 @@ class LLVM_ABI MCInstrAnalysis {
   evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                  uint64_t &Target) const;
 
+  /// Given an instruction that accesses a memory address, try to compute
+  /// the target address. Return true on success, and the address in \p Target.
+  virtual bool evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+                                   uint64_t Size, uint64_t &Target) const;
+
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
   virtual std::optional<uint64_t>
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b..1ae0c91a2590c 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -30,6 +30,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
   return false;
 }
 
+bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+                                          uint64_t Size,
+                                          uint64_t &Target) const {
+  return false;
+}
+
 std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
     const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
     uint64_t Size) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index f3b93f032588c..e52f5e50832c7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -29,7 +29,9 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include <bitset>
+#include <cstdint>
 
 #define GET_INSTRINFO_MC_DESC
 #define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -129,6 +131,7 @@ namespace {
 class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   int64_t GPRState[31] = {};
   std::bitset<31> GPRValidMask;
+  unsigned int ArchRegWidth;
 
   static bool isGPR(MCRegister Reg) {
     return Reg >= RISCV::X0 && Reg <= RISCV::X31;
@@ -165,8 +168,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
 public:
-  explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
-      : MCInstrAnalysis(Info) {}
+  explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info, unsigned int ArchRegWidth)
+      : MCInstrAnalysis(Info), ArchRegWidth(ArchRegWidth) {}
 
   void resetState() override { GPRValidMask.reset(); }
 
@@ -182,6 +185,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     }
 
     switch (Inst.getOpcode()) {
+    case RISCV::C_LUI:
+    case RISCV::LUI: {
+      setGPRState(Inst.getOperand(0).getReg(),
+                  SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+      break;
+    }
+    case RISCV::AUIPC: {
+      setGPRState(Inst.getOperand(0).getReg(),
+                  Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+      break;
+    }
     default: {
       // Clear the state of all defined registers for instructions that we don't
       // explicitly support.
@@ -193,10 +207,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
       }
       break;
     }
-    case RISCV::AUIPC:
-      setGPRState(Inst.getOperand(0).getReg(),
-                  Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
-      break;
     }
   }
 
@@ -234,6 +244,83 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     return false;
   }
 
+  bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                           uint64_t &Target) const override {
+    switch(Inst.getOpcode()) {
+    default:
+      return false;
+    case RISCV::C_ADDI:
+    case RISCV::ADDI: {
+      MCRegister Reg = Inst.getOperand(1).getReg();
+      auto TargetRegState = getGPRState(Reg);
+      if (TargetRegState && Reg != RISCV::X0) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        Target &= maskTrailingOnes<uint64_t>(ArchRegWidth);
+        return true;
+      }
+      break;
+    }
+    case RISCV::C_ADDIW:
+    case RISCV::ADDIW: {
+      MCRegister Reg = Inst.getOperand(1).getReg();
+      auto TargetRegState = getGPRState(Reg);
+      if (TargetRegState && Reg != RISCV::X0) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        Target = SignExtend64<32>(Target);
+        return true;
+      }
+      break;
+    }
+    case RISCV::LB:
+    case RISCV::LH:
+    case RISCV::LD:
+    case RISCV::LW:
+    case RISCV::LBU:
+    case RISCV::LHU:
+    case RISCV::LWU:
+    case RISCV::SB:
+    case RISCV::SH:
+    case RISCV::SW:
+    case RISCV::SD:
+    case RISCV::FLH:
+    case RISCV::FLW:
+    case RISCV::FLD:
+    case RISCV::FSH:
+    case RISCV::FSW:
+    case RISCV::FSD:
+    case RISCV::C_LD:
+    case RISCV::C_SD:
+    case RISCV::C_FLD:
+    case RISCV::C_FSD:
+    case RISCV::C_SW:
+    case RISCV::C_LW:
+    case RISCV::C_FSW:
+    case RISCV::C_FLW:
+    case RISCV::C_LBU:
+    case RISCV::C_LH:
+    case RISCV::C_LHU:
+    case RISCV::C_SB:
+    case RISCV::C_SH:
+    case RISCV::C_LWSP:
+    case RISCV::C_SWSP:
+    case RISCV::C_LDSP:
+    case RISCV::C_SDSP:
+    case RISCV::C_FLWSP:
+    case RISCV::C_FSWSP:
+    case RISCV::C_FLDSP:
+    case RISCV::C_FSDSP: {
+      MCRegister Reg = Inst.getOperand(1).getReg();
+      auto TargetRegState = getGPRState(Reg);
+      if (TargetRegState && Reg != RISCV::X0) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        return true;
+      }
+      break;
+    }
+    }
+    return false;
+  }
+
   bool isTerminator(const MCInst &Inst) const override {
     if (MCInstrAnalysis::isTerminator(Inst))
       return true;
@@ -327,8 +414,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
 
 } // end anonymous namespace
 
-static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
-  return new RISCVMCInstrAnalysis(Info);
+static MCInstrAnalysis *createRISCV32InstrAnalysis(const MCInstrInfo *Info) {
+  return new RISCVMCInstrAnalysis(Info, 32);
+}
+
+static MCInstrAnalysis *createRISCV64InstrAnalysis(const MCInstrInfo *Info) {
+  return new RISCVMCInstrAnalysis(Info, 64);
 }
 
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
@@ -344,12 +435,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
     TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer);
     TargetRegistry::RegisterObjectTargetStreamer(
         *T, createRISCVObjectTargetStreamer);
-    TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);
-
     // Register the asm target streamer.
     TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
     // Register the null target streamer.
     TargetRegistry::RegisterNullTargetStreamer(*T,
                                                createRISCVNullTargetStreamer);
   }
+  TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(),
+                                          createRISCV32InstrAnalysis);
+  TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(),
+                                          createRISCV64InstrAnalysis);
 }
diff --git a/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar
new file mode 100644
index 0000000000000..bc335bc24f88d
Binary files /dev/null and b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar differ
diff --git a/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage
new file mode 100644
index 0000000000000..08ba4f8846050
Binary files /dev/null and b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage differ
diff --git a/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
new file mode 100644
index 0000000000000..17351748513d9
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "RISCV" in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s
new file mode 100644
index 0000000000000..ec32e53fce0ca
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s
@@ -0,0 +1,111 @@
+# RUN: llvm-objdump -d %p/Inputs/riscv-ar-coverage | FileCheck %s
+
+# CHECK: 0000000000001000 <_start>:
+# CHECK-NEXT:     1000: 00001517     	auipc	a0, 0x1
+# CHECK-NEXT:     1004: 00450513     	addi	a0, a0, 0x4 <target>
+# CHECK-NEXT:     1008: 00001517     	auipc	a0, 0x1
+# CHECK-NEXT:     100c: 1571         	addi	a0, a0, -0x4 <target>
+# CHECK-NEXT:     100e: 6509         	lui	a0, 0x2
+# CHECK-NEXT:     1010: 0045059b     	addiw	a1, a0, 0x4 <target>
+# CHECK-NEXT:     1014: 6509         	lui	a0, 0x2
+# CHECK-NEXT:     1016: 2511         	addiw	a0, a0, 0x4 <target>
+# CHECK-NEXT:     1018: 00102537     	lui	a0, 0x102
+# CHECK-NEXT:     101c: c50c         	sw	a1, 0x8(a0) <far_target>
+# CHECK-NEXT:     101e: 00102537     	lui	a0, 0x102
+# CHECK-NEXT:     1022: 4508         	lw	a0, 0x8(a0) <far_target>
+# CHECK-NEXT:     1024: 6509         	lui	a0, 0x2
+# CHECK-NEXT:     1026: 6585         	lui	a1, 0x1
+# CHECK-NEXT:     1028: 0306         	slli	t1, t1, 0x1
+# CHECK-NEXT:     102a: 0511         	addi	a0, a0, 0x4 <target>
+# CHECK-NEXT:     102c: 0505         	addi	a0, a0, 0x1
+# CHECK-NEXT:     102e: 00200037     	lui	zero, 0x200
+# CHECK-NEXT:     1032: 00a02423     	sw	a0, 0x8(zero)
+# CHECK-NEXT:     1036: 00101097     	auipc	ra, 0x101
+# CHECK-NEXT:     103a: fd6080e7     	jalr	-0x2a(ra) <func>
+# CHECK-NEXT:     103e: 00102437     	lui	s0, 0x102
+# CHECK-NEXT:     1042: 8800         	sb	s0, 0x0(s0) <target+0xffffc>
+# CHECK-NEXT:     1044: 00102137     	lui	sp, 0x102
+# CHECK-NEXT:     1048: 4522         	lw	a0, 0x8(sp) <far_target>
+
+.global _start
+.text
+
+# The core of the feature being added was address resolution for instruction
+# sequences where a register is populated by immediate values via two
+# separate instructions. First by an instruction that provides the upper bits
+# (auipc, lui ...) followed by another instruction for the lower bits (addi,
+# jalr, ld ...).
+
+
+_start:
+  # Test block 1-3 each focus on a certain starting instruction in a sequences,
+  # the ones that provide the upper bits. The other sequence is another
+  # instruction the provides the lower bits. The second instruction is
+  # arbitrarily chosen to increase code coverage
+
+  # test block #1
+  lla a0, target     # addi
+  auipc a0, 0x1
+  c.addi a0, -0x4    # c.addi
+
+  # test block #2
+  c.lui a0, 0x2
+  addiw a1, a0, 0x4  # addiw
+  c.lui a0, 0x2
+  c.addiw a0, 0x4    # c.addiw
+
+  # test block #3
+  lui a0, 0x102
+  sw a1, 0x8(a0)     # sw
+  lui a0, 0x102
+  c.lw a0, 0x8(a0)   # lw
+
+  # Test block 4 tests instruction interleaving, essentially the code's
+  # ability to keep track of a valid sequence even if multiple other unrelated
+  # instructions separate the two
+
+  # test #4
+  lui a0, 0x2
+  lui a1, 0x1        # unrelated instruction
+  slli t1, t1, 0x1   # unrelated instruction
+  addi a0, a0, 0x4
+  addi a0, a0, 0x1
+
+  # Test 5 ensures that an instruction writing into the zero register does
+  # not trigger resolution because that register's value cannot change and
+  # the sequence is equivalent to never running the first instruction
+
+  # test #5
+  lui x0, 0x200
+  sw a0, 0x8(x0)
+
+  # Test 6 ensures that the newly added functionality is compatible with
+  # code that already worked for branch instructions
+
+  # test #6
+  call func
+
+  # test #7 zcb extension
+  lui x8, 0x102
+  # the immediate value for Zcb extension is heavily bounded, so we will relax
+  # the requirement of hitting one of the labels and focus on correctness of the
+  # resolution. This can be verified by looking at the source: The upper bits of
+  # lui make the far jump related to .skip 0x100000 and then 8 more bytes must be
+  # traversed before we hit far_target--.skip 0x4 and .word 1 in target. Adding 8
+  # to address resolved for the instruction below yields exactly the desired label.
+  c.sb x8, 0(x8)
+
+  # test #8 stack based load/stores
+  lui sp, 0x102
+  c.lwsp a0, 0x8(sp)
+
+# these are the labels that the instructions above are expecteed to resolve to
+.section .data
+.skip 0x4
+target:
+  .word 1
+.skip 0x100000
+far_target:
+  .word 2
+func:
+  ret
diff --git a/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
new file mode 100644
index 0000000000000..49225519a62df
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
@@ -0,0 +1,57 @@
+# RUN: llvm-objdump -d %p/Inputs/riscv-ar | FileCheck %s
+
+# CHECK:   auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   ld a0, {{-?0x[0-9a-fA-F]+}}(a0) <ldata+0xfa4>
+# CHECK:   auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
+# CHECK:   auipc	a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
+# CHECK:   auipc	a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   lw a0, {{-?0x[0-9a-fA-F]+}}(a0) <gdata>
+# CHECK:   auipc	a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}} <ldata>
+# CHECK:   auipc	ra, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   jalr {{-?0x[0-9a-fA-F]+}}(ra) <func>
+# CHECK:   auipc	t1, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   jr {{-?0x[0-9a-fA-F]+}}(t1) <func>
+# CHECK:   lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x12242678>
+# CHECK:   lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addiw	a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x1438ad>
+# CHECK:   slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK:   addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <_start+0xfefff>
+
+.global _start
+.text
+_start:
+  la a0, gdata
+  lla a0, gdata
+  lla a0, gdata
+  lw a0, gdata
+  lla a0, ldata
+
+  call func
+  tail func
+
+  li a0, 0x12345678
+  li a0, 0x1234567890abcdef
+  li a0, 0x10000
+  li a0, 0xfffff
+
+  .skip 0x100000
+func:
+  ret
+
+ldata:
+  .int 0
+
+.data
+gdata:
+  .int 0
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 5ecb33375943f..b5d316e1e3857 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1520,8 +1520,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
     if (MIA) {
       if (Disassembled) {
         uint64_t Target;
-        bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
-        if (TargetKnown && (Target >= Start && Target < End) &&
+        bool BranchTargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+        if (BranchTargetKnown && (Target >= Start && Target < End) &&
             !Targets.count(Target)) {
           // On PowerPC and AIX, a function call is encoded as a branch to 0.
           // On other PowerPC platforms (ELF), a function call is encoded as
@@ -2356,8 +2356,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
             bool PrintTarget = DT->InstrAnalysis->evaluateBranch(
-                Inst, SectionAddr + Index, Size, Target);
-
+                                   Inst, SectionAddr + Index, Size, Target) ||
+                               DT->InstrAnalysis->evaluateInstruction(
+                                   Inst, SectionAddr + Index, Size, Target);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(
@@ -2430,7 +2431,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                   break;
               }
 
-              // Branch targets are printed just after the instructions.
+              // Branch and instruction targets are printed just after the instructions.
               // Print the labels corresponding to the target if there's any.
               bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
               bool LabelAvailable = AllLabels.count(Target);

``````````

</details>


https://github.com/llvm/llvm-project/pull/109914


More information about the llvm-commits mailing list