[llvm] [MC][NFC] Allow MCInstrAnalysis to store state (PR #65479)
Job Noorman via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 2 01:19:12 PDT 2023
https://github.com/mtvec updated https://github.com/llvm/llvm-project/pull/65479
>From 72286701c48d722a7fbb43b4e1ca36b4f99ef2e1 Mon Sep 17 00:00:00 2001
From: Job Noorman <jnoorman at igalia.com>
Date: Wed, 6 Sep 2023 11:29:28 +0200
Subject: [PATCH 1/2] [MC][NFC] Allow MCInstrAnalysis to store state
Currently, all the analysis functions provided by `MCInstrAnalysis` work
on a single instruction. On some targets, this limits the kind of
instructions that can be successfully analyzed as common constructs may
need multiple instructions.
For example, a typical call sequence on RISC-V uses a auipc+jalr pair.
In order to analyse the jalr inside `evaluateBranch`, information about
the corresponding auipc is needed. Similarly, AArch64 uses adrp+ldr
pairs to access globals.
This patch proposes to add state to `MCInstrAnalysis` to support these
use cases. Two new virtual methods are added:
- `updateState`: takes an instruction and its address. This methods
should be called by clients on every instruction and allows targets to
store whatever information they need to analyse future instructions.
- `resetState`: clears the state whenever it becomes irrelevant. Clients
could call this, for example, when starting to disassemble a new
function.
Note that the default implementations do nothing so this patch is NFC.
No actual state is stored inside `MCInstrAnalysis`; deciding the
structure of the state is left to the targets.
This patch also modifies llvm-objdump to use the new interface.
This patch is an alternative to D116677 and the idea of storing state in
`MCInstrAnalysis` was first discussed there.
---
llvm/include/llvm/MC/MCInstrAnalysis.h | 15 +++++++++++++++
llvm/tools/llvm-objdump/llvm-objdump.cpp | 21 ++++++++++++++++-----
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index c3c675c39c5590c..dac12af599e6f34 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -37,6 +37,21 @@ class MCInstrAnalysis {
MCInstrAnalysis(const MCInstrInfo *Info) : Info(Info) {}
virtual ~MCInstrAnalysis() = default;
+ /// Clear the internal state. See updateState for more information.
+ virtual void resetState() {}
+
+ /// Update internal state with \p Inst at \p Addr.
+ ///
+ /// For some types a analyses, inspecting a single instruction is not
+ /// sufficient. Some examples are auipc/jalr pairs on RISC-V or adrp/ldr pairs
+ /// on AArch64. To support inspecting multiple instructions, targets may keep
+ /// track of an internal state while analysing instructions. Clients should
+ /// call updateState for every instruction which allows later calls to one of
+ /// the analysis functions to take previous instructions into account.
+ /// Whenever state becomes irrelevant (e.g., when starting to disassemble a
+ /// new function), clients should call resetState to clear it.
+ virtual void updateState(const MCInst &Inst, uint64_t Addr) {}
+
virtual bool isBranch(const MCInst &Inst) const {
return Info->get(Inst.getOpcode()).isBranch();
}
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 96d74d6e2d5e865..8f6479d3c6e31e4 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -842,7 +842,7 @@ class DisassemblerTarget {
std::unique_ptr<const MCSubtargetInfo> SubtargetInfo;
std::shared_ptr<MCContext> Context;
std::unique_ptr<MCDisassembler> DisAsm;
- std::shared_ptr<const MCInstrAnalysis> InstrAnalysis;
+ std::shared_ptr<MCInstrAnalysis> InstrAnalysis;
std::shared_ptr<MCInstPrinter> InstPrinter;
PrettyPrinter *Printer;
@@ -1265,14 +1265,19 @@ collectBBAddrMapLabels(const std::unordered_map<uint64_t, BBAddrMap> &AddrToBBAd
}
}
-static void collectLocalBranchTargets(
- ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm,
- MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr,
- uint64_t Start, uint64_t End, std::unordered_map<uint64_t, std::string> &Labels) {
+static void
+collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
+ MCDisassembler *DisAsm, MCInstPrinter *IP,
+ const MCSubtargetInfo *STI, uint64_t SectionAddr,
+ uint64_t Start, uint64_t End,
+ std::unordered_map<uint64_t, std::string> &Labels) {
// So far only supports PowerPC and X86.
if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86())
return;
+ if (MIA)
+ MIA->resetState();
+
Labels.clear();
unsigned LabelCount = 0;
Start += SectionAddr;
@@ -1298,6 +1303,7 @@ static void collectLocalBranchTargets(
!Labels.count(Target) &&
!(STI->getTargetTriple().isPPC() && Target == Index))
Labels[Target] = ("L" + Twine(LabelCount++)).str();
+ MIA->updateState(Inst, Index);
}
Index += Size;
}
@@ -1939,6 +1945,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
BBAddrMapLabels);
}
+ if (DT->InstrAnalysis)
+ DT->InstrAnalysis->resetState();
+
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
@@ -2155,6 +2164,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
if (TargetOS == &CommentStream)
*TargetOS << "\n";
}
+
+ DT->InstrAnalysis->updateState(Inst, SectionAddr + Index);
}
}
>From 2eac9f59aca1c59d47a615084b2bfb839e9f1b0c Mon Sep 17 00:00:00 2001
From: Job Noorman <jnoorman at igalia.com>
Date: Mon, 2 Oct 2023 10:18:30 +0200
Subject: [PATCH 2/2] fixup! [MC][NFC] Allow MCInstrAnalysis to store state
Fix typo
---
llvm/include/llvm/MC/MCInstrAnalysis.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index dac12af599e6f34..e3ddf0b8b8939c9 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -42,7 +42,7 @@ class MCInstrAnalysis {
/// Update internal state with \p Inst at \p Addr.
///
- /// For some types a analyses, inspecting a single instruction is not
+ /// For some types of analyses, inspecting a single instruction is not
/// sufficient. Some examples are auipc/jalr pairs on RISC-V or adrp/ldr pairs
/// on AArch64. To support inspecting multiple instructions, targets may keep
/// track of an internal state while analysing instructions. Clients should
More information about the llvm-commits
mailing list