[llvm] [feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv (PR #109914)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 21:44:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities
Author: Arjun Patel (arjunUpatel)
<details>
<summary>Changes</summary>
Resolves #<!-- -->108469
---
Full diff: https://github.com/llvm/llvm-project/pull/109914.diff
9 Files Affected:
- (modified) llvm/include/llvm/MC/MCInstrAnalysis.h (+5)
- (modified) llvm/lib/MC/MCInstrAnalysis.cpp (+6)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp (+103-10)
- (added) llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar ()
- (added) llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage ()
- (added) llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg (+2)
- (added) llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s (+111)
- (added) llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s (+57)
- (modified) llvm/tools/llvm-objdump/llvm-objdump.cpp (+6-5)
``````````diff
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 63a4e02a92360..1f05e8546c3d1 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -182,6 +182,11 @@ class LLVM_ABI MCInstrAnalysis {
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+ /// Given an instruction that accesses a memory address, try to compute
+ /// the target address. Return true on success, and the address in \p Target.
+ virtual bool evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const;
+
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
virtual std::optional<uint64_t>
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b..1ae0c91a2590c 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -30,6 +30,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
+bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size,
+ uint64_t &Target) const {
+ return false;
+}
+
std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index f3b93f032588c..e52f5e50832c7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -29,7 +29,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include <bitset>
+#include <cstdint>
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -129,6 +131,7 @@ namespace {
class RISCVMCInstrAnalysis : public MCInstrAnalysis {
int64_t GPRState[31] = {};
std::bitset<31> GPRValidMask;
+ unsigned int ArchRegWidth;
static bool isGPR(MCRegister Reg) {
return Reg >= RISCV::X0 && Reg <= RISCV::X31;
@@ -165,8 +168,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
public:
- explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
- : MCInstrAnalysis(Info) {}
+ explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info, unsigned int ArchRegWidth)
+ : MCInstrAnalysis(Info), ArchRegWidth(ArchRegWidth) {}
void resetState() override { GPRValidMask.reset(); }
@@ -182,6 +185,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
switch (Inst.getOpcode()) {
+ case RISCV::C_LUI:
+ case RISCV::LUI: {
+ setGPRState(Inst.getOperand(0).getReg(),
+ SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+ break;
+ }
+ case RISCV::AUIPC: {
+ setGPRState(Inst.getOperand(0).getReg(),
+ Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+ break;
+ }
default: {
// Clear the state of all defined registers for instructions that we don't
// explicitly support.
@@ -193,10 +207,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
break;
}
- case RISCV::AUIPC:
- setGPRState(Inst.getOperand(0).getReg(),
- Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
- break;
}
}
@@ -234,6 +244,83 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}
+ bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ switch(Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::C_ADDI:
+ case RISCV::ADDI: {
+ MCRegister Reg = Inst.getOperand(1).getReg();
+ auto TargetRegState = getGPRState(Reg);
+ if (TargetRegState && Reg != RISCV::X0) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ Target &= maskTrailingOnes<uint64_t>(ArchRegWidth);
+ return true;
+ }
+ break;
+ }
+ case RISCV::C_ADDIW:
+ case RISCV::ADDIW: {
+ MCRegister Reg = Inst.getOperand(1).getReg();
+ auto TargetRegState = getGPRState(Reg);
+ if (TargetRegState && Reg != RISCV::X0) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ Target = SignExtend64<32>(Target);
+ return true;
+ }
+ break;
+ }
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LD:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ case RISCV::C_LD:
+ case RISCV::C_SD:
+ case RISCV::C_FLD:
+ case RISCV::C_FSD:
+ case RISCV::C_SW:
+ case RISCV::C_LW:
+ case RISCV::C_FSW:
+ case RISCV::C_FLW:
+ case RISCV::C_LBU:
+ case RISCV::C_LH:
+ case RISCV::C_LHU:
+ case RISCV::C_SB:
+ case RISCV::C_SH:
+ case RISCV::C_LWSP:
+ case RISCV::C_SWSP:
+ case RISCV::C_LDSP:
+ case RISCV::C_SDSP:
+ case RISCV::C_FLWSP:
+ case RISCV::C_FSWSP:
+ case RISCV::C_FLDSP:
+ case RISCV::C_FSDSP: {
+ MCRegister Reg = Inst.getOperand(1).getReg();
+ auto TargetRegState = getGPRState(Reg);
+ if (TargetRegState && Reg != RISCV::X0) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+ }
+
bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
@@ -327,8 +414,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
} // end anonymous namespace
-static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
- return new RISCVMCInstrAnalysis(Info);
+static MCInstrAnalysis *createRISCV32InstrAnalysis(const MCInstrInfo *Info) {
+ return new RISCVMCInstrAnalysis(Info, 32);
+}
+
+static MCInstrAnalysis *createRISCV64InstrAnalysis(const MCInstrInfo *Info) {
+ return new RISCVMCInstrAnalysis(Info, 64);
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
@@ -344,12 +435,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer);
TargetRegistry::RegisterObjectTargetStreamer(
*T, createRISCVObjectTargetStreamer);
- TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);
-
// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
// Register the null target streamer.
TargetRegistry::RegisterNullTargetStreamer(*T,
createRISCVNullTargetStreamer);
}
+ TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(),
+ createRISCV32InstrAnalysis);
+ TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(),
+ createRISCV64InstrAnalysis);
}
diff --git a/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar
new file mode 100644
index 0000000000000..bc335bc24f88d
Binary files /dev/null and b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar differ
diff --git a/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage
new file mode 100644
index 0000000000000..08ba4f8846050
Binary files /dev/null and b/llvm/test/tools/llvm-objdump/RISCV/Inputs/riscv-ar-coverage differ
diff --git a/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
new file mode 100644
index 0000000000000..17351748513d9
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "RISCV" in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s
new file mode 100644
index 0000000000000..ec32e53fce0ca
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar-coverage.s
@@ -0,0 +1,111 @@
+# RUN: llvm-objdump -d %p/Inputs/riscv-ar-coverage | FileCheck %s
+
+# CHECK: 0000000000001000 <_start>:
+# CHECK-NEXT: 1000: 00001517 auipc a0, 0x1
+# CHECK-NEXT: 1004: 00450513 addi a0, a0, 0x4 <target>
+# CHECK-NEXT: 1008: 00001517 auipc a0, 0x1
+# CHECK-NEXT: 100c: 1571 addi a0, a0, -0x4 <target>
+# CHECK-NEXT: 100e: 6509 lui a0, 0x2
+# CHECK-NEXT: 1010: 0045059b addiw a1, a0, 0x4 <target>
+# CHECK-NEXT: 1014: 6509 lui a0, 0x2
+# CHECK-NEXT: 1016: 2511 addiw a0, a0, 0x4 <target>
+# CHECK-NEXT: 1018: 00102537 lui a0, 0x102
+# CHECK-NEXT: 101c: c50c sw a1, 0x8(a0) <far_target>
+# CHECK-NEXT: 101e: 00102537 lui a0, 0x102
+# CHECK-NEXT: 1022: 4508 lw a0, 0x8(a0) <far_target>
+# CHECK-NEXT: 1024: 6509 lui a0, 0x2
+# CHECK-NEXT: 1026: 6585 lui a1, 0x1
+# CHECK-NEXT: 1028: 0306 slli t1, t1, 0x1
+# CHECK-NEXT: 102a: 0511 addi a0, a0, 0x4 <target>
+# CHECK-NEXT: 102c: 0505 addi a0, a0, 0x1
+# CHECK-NEXT: 102e: 00200037 lui zero, 0x200
+# CHECK-NEXT: 1032: 00a02423 sw a0, 0x8(zero)
+# CHECK-NEXT: 1036: 00101097 auipc ra, 0x101
+# CHECK-NEXT: 103a: fd6080e7 jalr -0x2a(ra) <func>
+# CHECK-NEXT: 103e: 00102437 lui s0, 0x102
+# CHECK-NEXT: 1042: 8800 sb s0, 0x0(s0) <target+0xffffc>
+# CHECK-NEXT: 1044: 00102137 lui sp, 0x102
+# CHECK-NEXT: 1048: 4522 lw a0, 0x8(sp) <far_target>
+
+.global _start
+.text
+
+# The core of the feature being added was address resolution for instruction
+# sequences where a register is populated by immediate values via two
+# separate instructions. First by an instruction that provides the upper bits
+# (auipc, lui ...) followed by another instruction for the lower bits (addi,
+# jalr, ld ...).
+
+
+_start:
+ # Test block 1-3 each focus on a certain starting instruction in a sequences,
+ # the ones that provide the upper bits. The other sequence is another
+ # instruction the provides the lower bits. The second instruction is
+ # arbitrarily chosen to increase code coverage
+
+ # test block #1
+ lla a0, target # addi
+ auipc a0, 0x1
+ c.addi a0, -0x4 # c.addi
+
+ # test block #2
+ c.lui a0, 0x2
+ addiw a1, a0, 0x4 # addiw
+ c.lui a0, 0x2
+ c.addiw a0, 0x4 # c.addiw
+
+ # test block #3
+ lui a0, 0x102
+ sw a1, 0x8(a0) # sw
+ lui a0, 0x102
+ c.lw a0, 0x8(a0) # lw
+
+ # Test block 4 tests instruction interleaving, essentially the code's
+ # ability to keep track of a valid sequence even if multiple other unrelated
+ # instructions separate the two
+
+ # test #4
+ lui a0, 0x2
+ lui a1, 0x1 # unrelated instruction
+ slli t1, t1, 0x1 # unrelated instruction
+ addi a0, a0, 0x4
+ addi a0, a0, 0x1
+
+ # Test 5 ensures that an instruction writing into the zero register does
+ # not trigger resolution because that register's value cannot change and
+ # the sequence is equivalent to never running the first instruction
+
+ # test #5
+ lui x0, 0x200
+ sw a0, 0x8(x0)
+
+ # Test 6 ensures that the newly added functionality is compatible with
+ # code that already worked for branch instructions
+
+ # test #6
+ call func
+
+ # test #7 zcb extension
+ lui x8, 0x102
+ # the immediate value for Zcb extension is heavily bounded, so we will relax
+ # the requirement of hitting one of the labels and focus on correctness of the
+ # resolution. This can be verified by looking at the source: The upper bits of
+ # lui make the far jump related to .skip 0x100000 and then 8 more bytes must be
+ # traversed before we hit far_target--.skip 0x4 and .word 1 in target. Adding 8
+ # to address resolved for the instruction below yields exactly the desired label.
+ c.sb x8, 0(x8)
+
+ # test #8 stack based load/stores
+ lui sp, 0x102
+ c.lwsp a0, 0x8(sp)
+
+# these are the labels that the instructions above are expecteed to resolve to
+.section .data
+.skip 0x4
+target:
+ .word 1
+.skip 0x100000
+far_target:
+ .word 2
+func:
+ ret
diff --git a/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
new file mode 100644
index 0000000000000..49225519a62df
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
@@ -0,0 +1,57 @@
+# RUN: llvm-objdump -d %p/Inputs/riscv-ar | FileCheck %s
+
+# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: ld a0, {{-?0x[0-9a-fA-F]+}}(a0) <ldata+0xfa4>
+# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
+# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
+# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: lw a0, {{-?0x[0-9a-fA-F]+}}(a0) <gdata>
+# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <ldata>
+# CHECK: auipc ra, {{-?0x[0-9a-fA-F]+}}
+# CHECK: jalr {{-?0x[0-9a-fA-F]+}}(ra) <func>
+# CHECK: auipc t1, {{-?0x[0-9a-fA-F]+}}
+# CHECK: jr {{-?0x[0-9a-fA-F]+}}(t1) <func>
+# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x12242678>
+# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x1438ad>
+# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
+# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <_start+0xfefff>
+
+.global _start
+.text
+_start:
+ la a0, gdata
+ lla a0, gdata
+ lla a0, gdata
+ lw a0, gdata
+ lla a0, ldata
+
+ call func
+ tail func
+
+ li a0, 0x12345678
+ li a0, 0x1234567890abcdef
+ li a0, 0x10000
+ li a0, 0xfffff
+
+ .skip 0x100000
+func:
+ ret
+
+ldata:
+ .int 0
+
+.data
+gdata:
+ .int 0
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 5ecb33375943f..b5d316e1e3857 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1520,8 +1520,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
if (MIA) {
if (Disassembled) {
uint64_t Target;
- bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
- if (TargetKnown && (Target >= Start && Target < End) &&
+ bool BranchTargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ if (BranchTargetKnown && (Target >= Start && Target < End) &&
!Targets.count(Target)) {
// On PowerPC and AIX, a function call is encoded as a branch to 0.
// On other PowerPC platforms (ELF), a function call is encoded as
@@ -2356,8 +2356,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
llvm::raw_ostream *TargetOS = &FOS;
uint64_t Target;
bool PrintTarget = DT->InstrAnalysis->evaluateBranch(
- Inst, SectionAddr + Index, Size, Target);
-
+ Inst, SectionAddr + Index, Size, Target) ||
+ DT->InstrAnalysis->evaluateInstruction(
+ Inst, SectionAddr + Index, Size, Target);
if (!PrintTarget) {
if (std::optional<uint64_t> MaybeTarget =
DT->InstrAnalysis->evaluateMemoryOperandAddress(
@@ -2430,7 +2431,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
break;
}
- // Branch targets are printed just after the instructions.
+ // Branch and instruction targets are printed just after the instructions.
// Print the labels corresponding to the target if there's any.
bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
bool LabelAvailable = AllLabels.count(Target);
``````````
</details>
https://github.com/llvm/llvm-project/pull/109914
More information about the llvm-commits
mailing list