[llvm] [feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv #108469 (PR #109914)
Arjun Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 23:18:08 PDT 2024
https://github.com/arjunUpatel created https://github.com/llvm/llvm-project/pull/109914
None
>From e16b61af1f30f49e2d2dc2296c18bef49e819255 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 22 Sep 2024 15:34:23 -0400
Subject: [PATCH 1/3] Massage in RegisterState struct in existing codebase
---
.../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 62 ++++++++++++++++---
1 file changed, 52 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a0dc9d93c84b8d..93d567187a5db6 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -1,3 +1,4 @@
+
//===-- RISCVMCTargetDesc.cpp - RISC-V Target Descriptions ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -122,8 +123,26 @@ static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) {
namespace {
+// one way to implement this change is to keep the branching and
+// instruction analysis separate. in this approach, simply keeping track of
+// auipc and lui separately would be enough
+
+// in the other approach, we can merge the two by editing the RISCVMCInstrAnalysis class
+// this can be done by keeping reack of the general purpose registers
+// with respect to the last instruction that operated on them. this is basically
+// what evaluate branch is doing (looking for auipc (and only auipc because the state is only
+// updated when auipc is seen) and thena subsequent jalr)
+
class RISCVMCInstrAnalysis : public MCInstrAnalysis {
- int64_t GPRState[31] = {};
+
+ struct RegisterState {
+ int64_t Value;
+ // keep track of the last opcode that influenced
+ // the current value of the register
+ unsigned InlfuOpcode;
+ };
+
+ RegisterState GPRState[31] = {};
std::bitset<31> GPRValidMask;
static bool isGPR(MCRegister Reg) {
@@ -135,21 +154,22 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return Reg - RISCV::X1;
}
- void setGPRState(MCRegister Reg, std::optional<int64_t> Value) {
+ void setRegisterState(MCRegister Reg, std::optional<int64_t> Value, std::optional<unsigned> Opcode) {
if (Reg == RISCV::X0)
return;
auto Index = getRegIndex(Reg);
- if (Value) {
- GPRState[Index] = *Value;
+ if (Value && Opcode) {
+ GPRState[Index].Value = *Value;
+ GPRState[Index].InfluOpcode = Opcode;
GPRValidMask.set(Index);
} else {
GPRValidMask.reset(Index);
}
}
- std::optional<int64_t> getGPRState(MCRegister Reg) const {
+ std::optional<RegisterState> getRegisterState(McRegister Reg) const {
if (Reg == RISCV::X0)
return 0;
@@ -185,13 +205,13 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
for (unsigned I = 0; I < NumDefs; ++I) {
auto DefReg = Inst.getOperand(I).getReg();
if (isGPR(DefReg))
- setGPRState(DefReg, std::nullopt);
+ setRegisterState(DefReg, std::nullopt, std::nullopt);
}
break;
}
case RISCV::AUIPC:
- setGPRState(Inst.getOperand(0).getReg(),
- Addr + (Inst.getOperand(1).getImm() << 12));
+ setRegisterState(Inst.getOperand(0).getReg(),
+ Addr + (Inst.getOperand(1).getImm() << 12), RISCV::AUIPC);
break;
}
}
@@ -219,8 +239,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
if (Inst.getOpcode() == RISCV::JALR) {
- if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
- Target = *TargetRegState + Inst.getOperand(2).getImm();
+ if (auto TargetRegState = getRegisterState(Inst.getOperand(1).getReg())) {
+ Target = TargetRegState->Value + Inst.getOperand(2).getImm();
return true;
}
@@ -230,6 +250,26 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}
+ // keep track of what kind of instruction was used previously
+ // once we are concretely able to keep track of what kind of instruction was used previously,
+ // we just have to look at the corresponding
+ // how to output the analysis of the instructions?
+
+ // listen for instruction that gets the upper immediate. cache the value of the upper immediate
+ // now go until the next instruction that shows up with the lower bits and writes to the same register
+ // keep track of all the changes to that register in between
+
+
+ // bool evaluateInst(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ // uint64_t &Target) const override {
+ // if we see a function call a branch or a jump, then we cannot guarantee the
+ // location of the program counter since it can be edited after the jump, so
+ // we should discard the cache
+
+ // if the first check passes, then we should wait and look for all the instructions
+ // that will provide the lower 12 bits
+ }
+
bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
@@ -359,3 +399,5 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
createRISCVNullTargetStreamer);
}
}
+
+
>From 9e07898959faca5bdaab30815ed196e88735f960 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 22 Sep 2024 15:59:54 -0400
Subject: [PATCH 2/3] include skeleton + pseudocode
---
llvm/include/llvm/MC/MCInstrAnalysis.h | 3 +++
llvm/lib/MC/MCInstrAnalysis.cpp | 6 ++++++
2 files changed, 9 insertions(+)
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index b571791c518da8..e7c08d93282df3 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -180,6 +180,9 @@ class MCInstrAnalysis {
virtual bool
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+ virtual bool
+ evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const;
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b3..ae5d8817655d2e 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -30,6 +30,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
+bool MCInstrAnalysis::evaluateInstruction(const MCInst & /*Inst*/, uint64_t /*Addr*/,
+ uint64_t /*Size*/,
+ uint64_t & /*Target*/) const {
+ return false;
+}
+
std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
>From a46dd9231d3a175f4f4a6e28cc6146cc5f4ec6eb Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Wed, 25 Sep 2024 00:59:45 -0400
Subject: [PATCH 3/3] added evaluateInstruction method. needs tests
---
llvm/include/llvm/MC/MCInstrAnalysis.h | 4 ++
llvm/lib/MC/MCInstrAnalysis.cpp | 7 ++
.../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 69 +++++++++++++++----
llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +-
4 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index b571791c518da8..7a0085268727a2 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -181,6 +181,10 @@ class MCInstrAnalysis {
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;
+ virtual bool
+ evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const;
+
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
virtual std::optional<uint64_t>
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b3..de1319698f966c 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/APInt.h"
#include <cstdint>
+#include "MCInstrAnalysis.h"
namespace llvm {
class MCSubtargetInfo;
@@ -30,6 +31,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
+bool llvm::MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
+ uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const {
+ return false;
+}
+
std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a0dc9d93c84b8d..73d975bf3d6a39 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -178,21 +178,24 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
switch (Inst.getOpcode()) {
- default: {
- // Clear the state of all defined registers for instructions that we don't
- // explicitly support.
- auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
- for (unsigned I = 0; I < NumDefs; ++I) {
- auto DefReg = Inst.getOperand(I).getReg();
- if (isGPR(DefReg))
- setGPRState(DefReg, std::nullopt);
+ default: {
+ // Clear the state of all defined registers for instructions that we don't
+ // explicitly support.
+ auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ auto DefReg = Inst.getOperand(I).getReg();
+ if (isGPR(DefReg))
+ setGPRState(DefReg, std::nullopt);
+ }
+ break;
+ }
+ case RISCV::AUIPC:
+ case RISCV::LUI:
+ {
+ setGPRState(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getImm() << 12);
+ break;
}
- break;
- }
- case RISCV::AUIPC:
- setGPRState(Inst.getOperand(0).getReg(),
- Addr + (Inst.getOperand(1).getImm() << 12));
- break;
}
}
@@ -230,6 +233,44 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}
+ bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ switch(Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::ADDI:
+ case RISCV::ADDIW: {
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+ break;
+ }
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::FSW:
+ case RISCV::SD:
+ case RISCV::FSD: {
+ int64_t Offset = Inst.getOperand(2).getImm();
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
+ Target = *TargetRegState + Offset;
+ else
+ Target = Offset;
+ return true;
+ }
+ }
+ }
+
bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index b69d14b4e7609a..915394fc3b3724 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1513,7 +1513,7 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
if (MIA) {
if (Disassembled) {
uint64_t Target;
- bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+ bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target) || MIA->evaluateInstruction(Inst, Index, Size, Target);
if (TargetKnown && (Target >= Start && Target < End) &&
!Labels.count(Target)) {
// On PowerPC and AIX, a function call is encoded as a branch to 0.
More information about the llvm-commits
mailing list