[llvm] [BOLT] Add symbolizer for AArch64 disassembler. NFCI (PR #127969)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 17:18:18 PST 2025
https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/127969
>From 74cec63ec3a7fd8a5e6562a99c2e86136137a656 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Tue, 18 Feb 2025 15:52:53 -0800
Subject: [PATCH 1/3] [BOLT] Add symbolizer for AArch64 disassembler. NFCI
Add AArch64MCSymbolizer that symbolizes MCInst operands during
disassembly. The symbolization was previously done in
`BinaryFunction::disassemble()`, but it is also required by
`scanExternalRefs()` for "lite" mode functionality. Hence, similar to
x86, I've implemented the symbolizer interface that uses BinaryFunction
relocations to properly create instruction operands. I expect the result
of the disassembly to be identical after the change.
So far, the only quirk of AArch64 disassembler that I found is that it
marks `ldr` instructions as branch by setting `IsBranch` parameter to
true. Ignore the parameter and rely on `MCPlusBuilder` interface
instead.
---
bolt/lib/Core/BinaryFunction.cpp | 21 +----
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 7 ++
.../Target/AArch64/AArch64MCSymbolizer.cpp | 86 +++++++++++++++++++
bolt/lib/Target/AArch64/AArch64MCSymbolizer.h | 44 ++++++++++
bolt/lib/Target/AArch64/CMakeLists.txt | 1 +
5 files changed, 139 insertions(+), 20 deletions(-)
create mode 100644 bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
create mode 100644 bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index bc45caf3ec8b7..f64f2a3591061 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1434,9 +1434,8 @@ Error BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
- } else if (BC.isAArch64() || BC.isRISCV()) {
+ } else if (BC.isRISCV()) {
// Check if there's a relocation associated with this instruction.
- bool UsedReloc = false;
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
@@ -1461,24 +1460,6 @@ Error BinaryFunction::disassemble() {
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
-
- // For aarch64, if we replaced an immediate with a symbol from a
- // relocation, we mark it so we do not try to further process a
- // pc-relative operand. All we need is the symbol.
- UsedReloc = true;
- }
-
- if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
- if (auto NewE = handleErrors(
- handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
- [&](const BOLTError &E) -> Error {
- if (E.isFatal())
- return Error(std::make_unique<BOLTError>(std::move(E)));
- if (!E.getMessage().empty())
- E.log(BC.errs());
- return Error::success();
- }))
- return Error(std::move(NewE));
}
}
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 4b21ff719b3ab..60efa25c54ffa 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64MCSymbolizer.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
@@ -133,6 +134,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
+ std::unique_ptr<MCSymbolizer>
+ createTargetSymbolizer(BinaryFunction &Function,
+ bool CreateNewSymbols) const override {
+ return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
+ }
+
MCPhysReg getStackPointer() const override { return AArch64::SP; }
MCPhysReg getFramePointer() const override { return AArch64::FP; }
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
new file mode 100644
index 0000000000000..de92a5a493e27
--- /dev/null
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -0,0 +1,86 @@
+//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCSymbolizer.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Core/Relocation.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bolt-symbolizer"
+
+namespace llvm {
+namespace bolt {
+
+AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
+
+bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
+ MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
+ bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
+ BinaryContext &BC = Function.getBinaryContext();
+ MCContext *Ctx = BC.Ctx.get();
+
+ // NOTE: the callee may incorrectly set IsBranch.
+ if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
+ return false;
+
+ // TODO: add handling for linker "relaxation". At the moment, relocations
+ // corresponding to "relaxed" instructions are excluded from BinaryFunction
+ // relocation list.
+
+ const uint64_t InstOffset = InstAddress - Function.getAddress();
+ const Relocation *Relocation = Function.getRelocationAt(InstOffset);
+
+ /// Add symbolic operand to the instruction with an optional addend.
+ auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
+ uint64_t RelType) {
+ const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
+ if (Addend)
+ Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
+ *Ctx);
+ Inst.addOperand(MCOperand::createExpr(
+ BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
+ };
+
+ if (Relocation) {
+ addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
+ return true;
+ }
+
+ if (!BC.MIB->hasPCRelOperand(Inst))
+ return false;
+
+ Value += InstAddress;
+ const MCSymbol *TargetSymbol;
+ uint64_t TargetOffset;
+ if (!CreateNewSymbols) {
+ if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
+ TargetSymbol = BD->getSymbol();
+ TargetOffset = Value - BD->getAddress();
+ } else {
+ return false;
+ }
+ } else {
+ std::tie(TargetSymbol, TargetOffset) =
+ BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
+ }
+
+ addOperand(TargetSymbol, TargetOffset, 0);
+
+ return true;
+}
+
+void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
+ int64_t Value,
+ uint64_t Address) {}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
new file mode 100644
index 0000000000000..56ba4fbcaf275
--- /dev/null
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
@@ -0,0 +1,44 @@
+//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
+#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
+
+#include "bolt/Core/BinaryFunction.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+
+namespace llvm {
+namespace bolt {
+
+class AArch64MCSymbolizer : public MCSymbolizer {
+protected:
+ BinaryFunction &Function;
+ bool CreateNewSymbols{true};
+
+public:
+ AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
+ : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
+ Function(Function), CreateNewSymbols(CreateNewSymbols) {}
+
+ AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
+ AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
+ virtual ~AArch64MCSymbolizer();
+
+ bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override;
+
+ void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
+ uint64_t Address) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt
index 8435ea7245e7e..6cf50ae2df9eb 100644
--- a/bolt/lib/Target/AArch64/CMakeLists.txt
+++ b/bolt/lib/Target/AArch64/CMakeLists.txt
@@ -18,6 +18,7 @@ endif()
add_llvm_library(LLVMBOLTTargetAArch64
AArch64MCPlusBuilder.cpp
+ AArch64MCSymbolizer.cpp
NO_EXPORT
DISABLE_LLVM_LINK_LLVM_DYLIB
>From ffd70837664ec284a848ae3cb5e37586278493ec Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Fri, 21 Feb 2025 17:13:17 -0800
Subject: [PATCH 2/3] fixup! [BOLT] Add symbolizer for AArch64 disassembler.
NFCI
---
bolt/lib/Core/BinaryFunction.cpp | 16 ++++++++++++++++
bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp | 1 -
bolt/lib/Target/AArch64/CMakeLists.txt | 1 +
.../AArch64/Disassembler/AArch64Disassembler.cpp | 5 ++++-
4 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index f64f2a3591061..f4058c44d5463 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1327,6 +1327,22 @@ Error BinaryFunction::disassemble() {
BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
BC.errs() << '\n';
}
+
+ // Verify that we've symbolized an operand if the instruction has a
+ // relocation against it.
+ if (getRelocationInRange(Offset, Offset + Size)) {
+ bool HasSymbolicOp = false;
+ for (MCOperand &Op : Instruction) {
+ if (Op.isExpr()) {
+ HasSymbolicOp = true;
+ break;
+ }
+ }
+ if (!HasSymbolicOp)
+ return createFatalBOLTError(
+ "expected symbolized operand for instruction at 0x" +
+ Twine::utohexstr(AbsoluteInstrAddr));
+ }
}
// Special handling for AVX-512 instructions.
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
index de92a5a493e27..7145e77a1edbb 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -12,7 +12,6 @@
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/Relocation.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "bolt-symbolizer"
diff --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt
index 6cf50ae2df9eb..cb38117de659e 100644
--- a/bolt/lib/Target/AArch64/CMakeLists.txt
+++ b/bolt/lib/Target/AArch64/CMakeLists.txt
@@ -1,5 +1,6 @@
set(LLVM_LINK_COMPONENTS
MC
+ MCDisassembler
Support
AArch64Desc
)
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 8b1c16d319b2c..f78e6926f2c7d 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
Inst.getOpcode() == AArch64::MOVKXi)
Inst.addOperand(Inst.getOperand(0));
- Inst.addOperand(MCOperand::createImm(imm));
+ if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
+ 0, 4))
+ Inst.addOperand(MCOperand::createImm(imm));
+
Inst.addOperand(MCOperand::createImm(shift));
return Success;
}
>From e2927ce1289677d7080035d001287a83a69bb336 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Fri, 21 Feb 2025 17:17:58 -0800
Subject: [PATCH 3/3] fixup! [BOLT] Add symbolizer for AArch64 disassembler.
NFCI
---
bolt/lib/Core/BinaryFunction.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index f4058c44d5463..0b52fb434c60d 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1575,8 +1575,9 @@ bool BinaryFunction::scanExternalRefs() {
assert(FunctionData.size() == getMaxSize() &&
"function size does not match raw data size");
- BC.SymbolicDisAsm->setSymbolizer(
- BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
+ if (BC.isX86())
+ BC.SymbolicDisAsm->setSymbolizer(
+ BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
// Disassemble contents of the function. Detect code entry points and create
// relocations for references to code that will be moved.
More information about the llvm-commits
mailing list