[llvm] [feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv (PR #109914)
Arjun Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 13 19:25:15 PDT 2024
https://github.com/arjunUpatel updated https://github.com/llvm/llvm-project/pull/109914
>From a46dd9231d3a175f4f4a6e28cc6146cc5f4ec6eb Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Wed, 25 Sep 2024 00:59:45 -0400
Subject: [PATCH 1/3] added evaluateInstruction method. needs tests
---
llvm/include/llvm/MC/MCInstrAnalysis.h | 4 ++
llvm/lib/MC/MCInstrAnalysis.cpp | 7 ++
.../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 69 +++++++++++++++----
llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +-
4 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index b571791c518da8..7a0085268727a2 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -181,6 +181,10 @@ class MCInstrAnalysis {
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+ virtual bool
+ evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const;
+
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
virtual std::optional<uint64_t>
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b3..de1319698f966c 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/APInt.h"
#include <cstdint>
+#include "MCInstrAnalysis.h"
namespace llvm {
class MCSubtargetInfo;
@@ -30,6 +31,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
+bool llvm::MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
+ uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const {
+ return false;
+}
+
std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a0dc9d93c84b8d..73d975bf3d6a39 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -178,21 +178,24 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
switch (Inst.getOpcode()) {
- default: {
- // Clear the state of all defined registers for instructions that we don't
- // explicitly support.
- auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
- for (unsigned I = 0; I < NumDefs; ++I) {
- auto DefReg = Inst.getOperand(I).getReg();
- if (isGPR(DefReg))
- setGPRState(DefReg, std::nullopt);
+ default: {
+ // Clear the state of all defined registers for instructions that we don't
+ // explicitly support.
+ auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ auto DefReg = Inst.getOperand(I).getReg();
+ if (isGPR(DefReg))
+ setGPRState(DefReg, std::nullopt);
+ }
+ break;
+ }
+ case RISCV::AUIPC:
+ case RISCV::LUI:
+ {
+ setGPRState(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getImm() << 12);
+ break;
}
- break;
- }
- case RISCV::AUIPC:
- setGPRState(Inst.getOperand(0).getReg(),
- Addr + (Inst.getOperand(1).getImm() << 12));
- break;
}
}
@@ -230,6 +233,44 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}
+ bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ switch(Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::ADDI:
+ case RISCV::ADDIW: {
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+ break;
+ }
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::SD:
+ case RISCV::FSD: {
+ int64_t Offset = Inst.getOperand(2).getImm();
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
+ Target = *TargetRegState + Offset;
+ else
+ Target = Offset;
+ return true;
+ }
+ }
+ }
+
bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index b69d14b4e7609a..915394fc3b3724 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1513,7 +1513,7 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
if (MIA) {
if (Disassembled) {
uint64_t Target;
- bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target) || MIA->evaluateInstruction(Inst, Index, Size, Target);
if (TargetKnown && (Target >= Start && Target < End) &&
!Labels.count(Target)) {
// On PowerPC and AIX, a function call is encoded as a branch to 0.
>From 30dd584dc7ec6e4c162aecd3e0ddcb906b9d6374 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Wed, 25 Sep 2024 03:26:15 -0400
Subject: [PATCH 2/3] dealing with git
---
llvm/lib/MC/MCInstrAnalysis.cpp | 3 +--
llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 9 ++++++---
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index de1319698f966c..49acfbbfcc6598 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/APInt.h"
#include <cstdint>
-#include "MCInstrAnalysis.h"
namespace llvm {
class MCSubtargetInfo;
@@ -31,7 +30,7 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
-bool llvm::MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
+bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
uint64_t Addr, uint64_t Size,
uint64_t &Target) const {
return false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 73d975bf3d6a39..e431fa1014d5e0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -189,9 +189,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
break;
}
- case RISCV::AUIPC:
- case RISCV::LUI:
- {
+ case RISCV::AUIPC: {
+ setGPRState(Inst.getOperand(0).getReg(),
+ Addr + (Inst.getOperand(1).getImm() << 12));
+ break;
+ }
+ case RISCV::LUI: {
setGPRState(Inst.getOperand(0).getReg(),
Inst.getOperand(1).getImm() << 12);
break;
>From fce36f154b413dbfa2c4a8da7d6b9ee7211180f8 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 13 Oct 2024 22:24:56 -0400
Subject: [PATCH 3/3] sign extend relevant immediates
---
.../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 51 +++++++++++++------
llvm/tools/llvm-objdump/llvm-objdump.cpp | 22 +++++---
2 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index e431fa1014d5e0..6325b5bd594dea 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -31,7 +31,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include <bitset>
+#include <cstdint>
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -178,6 +180,24 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
switch (Inst.getOpcode()) {
+ case RISCV::LUI: {
+ setGPRState(Inst.getOperand(0).getReg(),
+ SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+ break;
+ }
+ case RISCV::C_LUI: {
+ MCRegister Reg = Inst.getOperand(0).getReg();
+ if (Reg == RISCV::X2)
+ break;
+ setGPRState(Reg, SignExtend64<17>(Inst.getOperand(1).getImm() << 12));
+ break;
+
+ }
+ case RISCV::AUIPC: {
+ setGPRState(Inst.getOperand(0).getReg(),
+ Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+ break;
+ }
default: {
// Clear the state of all defined registers for instructions that we don't
// explicitly support.
@@ -189,16 +209,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
break;
}
- case RISCV::AUIPC: {
- setGPRState(Inst.getOperand(0).getReg(),
- Addr + (Inst.getOperand(1).getImm() << 12));
- break;
- }
- case RISCV::LUI: {
- setGPRState(Inst.getOperand(0).getReg(),
- Inst.getOperand(1).getImm() << 12);
- break;
- }
}
}
@@ -244,34 +254,43 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
case RISCV::ADDI:
case RISCV::ADDIW: {
if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
- Target = *TargetRegState + Inst.getOperand(2).getImm();
+ // TODO: Figure out ways to find the actual value of XLEN during analysis
+ int XLEN = 32;
+ uint64_t mask = ~(0) >> XLEN;
+ Target = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
+ Target &= mask;
+ Target = SignExtend64<32>(Target);
return true;
}
break;
}
case RISCV::LB:
case RISCV::LH:
+ case RISCV::LD:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLW:
- case RISCV::FLD:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
- case RISCV::FSW:
case RISCV::SD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::FSH:
+ case RISCV::FSW:
case RISCV::FSD: {
- int64_t Offset = Inst.getOperand(2).getImm();
+ int64_t Offset = SignExtend64<12>(Inst.getOperand(2).getImm());
if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
Target = *TargetRegState + Offset;
else
Target = Offset;
return true;
}
+ // TODO: Add cases for compressed load and store instructions
}
+ return false;
}
bool isTerminator(const MCInst &Inst) const override {
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 915394fc3b3724..f2199f8e5b486a 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1513,9 +1513,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
if (MIA) {
if (Disassembled) {
uint64_t Target;
- bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target) || MIA->evaluateInstruction(Inst, Index, Size, Target);
- if (TargetKnown && (Target >= Start && Target < End) &&
- !Labels.count(Target)) {
+ bool BranchTargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ if (BranchTargetKnown && (Target >= Start && Target < End)) {
// On PowerPC and AIX, a function call is encoded as a branch to 0.
// On other PowerPC platforms (ELF), a function call is encoded as
// a branch to self. Do not add a label for these cases.
@@ -2322,8 +2321,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
if (Disassembled && DT->InstrAnalysis) {
llvm::raw_ostream *TargetOS = &FOS;
uint64_t Target;
- bool PrintTarget = DT->InstrAnalysis->evaluateBranch(
- Inst, SectionAddr + Index, Size, Target);
+ bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) ||
+ DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
if (!PrintTarget) {
if (std::optional<uint64_t> MaybeTarget =
@@ -2397,7 +2396,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
break;
}
- // Branch targets are printed just after the instructions.
+ // Branch and instruction targets are printed just after the instructions.
// Print the labels corresponding to the target if there's any.
bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
bool LabelAvailable = AllLabels.count(Target);
@@ -2478,7 +2477,16 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
<< ">";
} else if (LabelAvailable) {
*TargetOS << " <" << AllLabels[Target] << ">";
- }
+ }
+ // else {
+ // this case is needed because the first load in the test assembly
+ // did not have any symbols in the section nor was it caught by any of
+ // else if cases. this warrented a case where the address is printed realtive
+ // to the target section. Since no symbol was found, there is no need to handle
+ // relocations
+ // *TargetOS << " <" <<
+
+ // }
// By convention, each record in the comment stream should be
// terminated.
if (TargetOS == &CommentStream)
More information about the llvm-commits
mailing list