[llvm] [BOLT][RISCV]Fix handling of GOT relocation pairs (PR #149658)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 19 09:46:02 PDT 2025
https://github.com/dinyy updated https://github.com/llvm/llvm-project/pull/149658
>From 209456848004a57fa992e640578c17404a2203f8 Mon Sep 17 00:00:00 2001
From: yjn <1076891326 at qq.com>
Date: Sat, 19 Jul 2025 23:38:47 +0800
Subject: [PATCH 1/2] [BOLT][RISCV]fix up GOT Relocation Handling
---
bolt/include/bolt/Core/MCPlusBuilder.h | 31 +++++-
bolt/lib/Core/BinaryFunction.cpp | 104 +++++++++++++------
bolt/lib/Passes/FixRISCVCallsPass.cpp | 34 ++++++
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 36 +++++++
bolt/test/RISCV/reloc-got.s | 18 +++-
5 files changed, 189 insertions(+), 34 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c43cd1d..a4d8c4c0d1f64 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -192,6 +192,12 @@ class MCPlusBuilder {
SrcInst.erase(AnnotationOp, SrcInst.end());
}
+ /// Copy annotations from \p SrcInst to \p DstInst.
+ void copyAnnotations (MCInst &SrcInst, MCInst &DstInst) const {
+ MCInst::iterator AnnotationOp = getAnnotationInstOp(SrcInst);
+ for (MCInst::iterator Iter = AnnotationOp; Iter != SrcInst.end(); ++Iter)
+ DstInst.addOperand(*Iter);
+ }
/// Return iterator range covering def operands.
iterator_range<MCInst::iterator> defOperands(MCInst &Inst) const {
@@ -839,6 +845,29 @@ class MCPlusBuilder {
return StringRef();
}
+ /// Returns the base register used by the instruction.
+ virtual unsigned getBaseReg(const MCInst &Inst) const{
+ llvm_unreachable("not implemented");
+ return 0;
+ }
+
+ /// Matches a pair of instructions that implement a GOT load:
+ /// an AUIPC (loading the high part of the address)
+ /// followed by a GOT-loading instruction (loading the low part of the address).
+ virtual bool matchGotAuipcPair(const MCInst &Inst) const{
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
+ /// Try to find a symbol referenced by a PC-relative LO (low 12 bits)
+ // relocation in the instruction.
+ virtual const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const{
+ llvm_unreachable("not implemented");
+ return nullptr;
+ }
+
+
+
/// Interface and basic functionality of a MCInstMatcher. The idea is to make
/// it easy to match one or more MCInsts against a tree-like pattern and
/// extract the fragment operands. Example:
@@ -2303,7 +2332,7 @@ class MCPlusBuilder {
// We have to use at least 2-byte alignment for functions because of C++
// ABI.
return 2;
- }
+ }
// AliasMap caches a mapping of registers to the set of registers that
// alias (are sub or superregs of itself, including itself).
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..5d849f0fe3752 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1457,12 +1457,42 @@ Error BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
- } else if (BC.isRISCV()) {
- // Check if there's a relocation associated with this instruction.
- for (auto Itr = Relocations.lower_bound(Offset),
- ItrE = Relocations.lower_bound(Offset + Size);
+ }
+
+add_instruction:
+ if (getDWARFLineTable()) {
+ Instruction.setLoc(findDebugLineInformationForInstructionAt(
+ AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
+ }
+
+ // Record offset of the instruction for profile matching.
+ if (BC.keepOffsetForInstruction(Instruction))
+ MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
+
+ if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
+ // NOTE: disassembly loses the correct size information for noops on x86.
+ // E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
+ // 5 bytes. Preserve the size info using annotations.
+ MIB->setSize(Instruction, Size);
+ }
+
+ addInstruction(Offset, std::move(Instruction));
+ }
+ if(BC.isRISCV()){
+ for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) {
+ uint64_t CurOffset = CurInstrIt->first;
+ if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue;
+
+ if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue;
+ if (MIB->isPseudo(CurInstrIt->second)) continue;
+ if (isZeroPaddingAt(CurInstrIt->first)) continue;
+
+ auto NextInstrIt = std::next(CurInstrIt);
+ uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize();
+ for (auto Itr = Relocations.lower_bound(CurOffset),
+ ItrE = Relocations.lower_bound(NextOffset);
Itr != ItrE; ++Itr) {
- const Relocation &Relocation = Itr->second;
+ Relocation &Relocation = Itr->second;
MCSymbol *Symbol = Relocation.Symbol;
if (Relocation::isInstructionReference(Relocation.Type)) {
@@ -1484,35 +1514,51 @@ Error BinaryFunction::disassemble() {
if (Relocation::isGOT(Relocation.Type)) {
assert(Relocation::isPCRelative(Relocation.Type) &&
"GOT relocation must be PC-relative on RISC-V");
+ // For RISC-V, we need to find the next instruction
+ // that matches the current instruction's base register.
+ auto NextInstrIt = std::next(CurInstrIt);
+ unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second);
+ while (NextInstrIt != Instructions.end()) {
+ MCInst &NextInst = NextInstrIt->second;
+ unsigned NextReg = BC.MIB->getBaseReg(NextInst);
+ // some case there exit extra auipc instruction
+ // like auipc+auipc+ld instruction,so we need skip it
+ if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) {
+ break;
+ }
+ if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){
+
+ int64_t CurImm = 0;
+ for (const MCOperand &Op : CurInstrIt->second) {
+ if (Op.isImm()) {
+ CurImm = Op.getImm();
+ break;
+ }
+ }
+ int64_t NextImm = 0;
+ for (const MCOperand &Op : NextInstrIt->second) {
+ if (Op.isImm()) {
+ NextImm = Op.getImm();
+ break;
+ }
+ }
+ Relocation.Value = (CurImm << 12) + NextImm;
+ break;
+ }
+ NextInstrIt = std::next(NextInstrIt);
+ }
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
Addend = Relocation.Value + Relocation.Offset + getAddress();
- }
- int64_t Value = Relocation.Value;
- const bool Result = BC.MIB->replaceImmWithSymbolRef(
- Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
- (void)Result;
- assert(Result && "cannot replace immediate with relocation");
- }
- }
-add_instruction:
- if (getDWARFLineTable()) {
- Instruction.setLoc(findDebugLineInformationForInstructionAt(
- AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable()));
- }
-
- // Record offset of the instruction for profile matching.
- if (BC.keepOffsetForInstruction(Instruction))
- MIB->setOffset(Instruction, static_cast<uint32_t>(Offset));
+ }
+ int64_t Value = Relocation.Value;
+ const bool Result = BC.MIB->replaceImmWithSymbolRef(
+ CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
+ (void)Result;
+ assert(Result && "cannot replace immediate with relocation");
- if (BC.isX86() && BC.MIB->isNoop(Instruction)) {
- // NOTE: disassembly loses the correct size information for noops on x86.
- // E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only
- // 5 bytes. Preserve the size info using annotations.
- MIB->setSize(Instruction, Size);
+ }
}
-
- addInstruction(Offset, std::move(Instruction));
}
for (auto [Offset, Label] : InstructionLabels) {
diff --git a/bolt/lib/Passes/FixRISCVCallsPass.cpp b/bolt/lib/Passes/FixRISCVCallsPass.cpp
index 9011ef303a80e..a756224432267 100644
--- a/bolt/lib/Passes/FixRISCVCallsPass.cpp
+++ b/bolt/lib/Passes/FixRISCVCallsPass.cpp
@@ -20,6 +20,40 @@ void FixRISCVCallsPass::runOnFunction(BinaryFunction &BF) {
auto &BC = BF.getBinaryContext();
auto &MIB = BC.MIB;
auto *Ctx = BC.Ctx.get();
+ // Since JitLink currently only supports adjacent AUIPC/LO12 pairs,
+ // we must guarantee that the label for the AUIPC is
+ // immediately before the LO12 instruction.
+ for (auto &BB : BF) {
+ for (auto II = BB.begin(); II != BB.end(); ) {
+ const MCInst &AInst = *II;
+ const MCSymbol *TargetSym = nullptr;
+ if (MIB->isPseudo(*II)) {
+ ++II;
+ continue;
+ }
+ TargetSym = MIB->getPCRelLoSymbol(AInst);
+ if (TargetSym) {
+ auto BI = II;
+ bool found = false;
+ while(BI != BB.begin()){
+ auto *Label = MIB->getInstLabel(*BI);
+ if (Label && Label == TargetSym) {
+ foundSymbol = true;
+ break;
+ }
+ BI--;
+ }
+ if (foundSymbol && std::next(BI) != II) {
+ MCInst SavedInst = *BI;
+ MIB->copyAnnotations(*BI, SavedInst);
+ BB.eraseInstruction(BI);
+ II = BB.insertInstruction(--II, std::move(SavedInst));
+ II++;
+ }
+ }
+ ++II;
+ }
+ }
for (auto &BB : BF) {
for (auto II = BB.begin(); II != BB.end();) {
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 10b4913b6ab7f..2fdac8a3d0ff2 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -339,6 +339,42 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
}
}
+ unsigned getBaseReg(const MCInst &Inst) const override{
+ switch (Inst.getOpcode()) {
+ default:
+ return 0;
+ case RISCV::AUIPC:
+ return Inst.getOperand(0).getReg();
+ case RISCV::ADDI:
+ case RISCV::LD:
+ return Inst.getOperand(1).getReg();
+ }
+ }
+
+ bool matchGotAuipcPair(const MCInst &Inst) const override{
+ return Inst.getOpcode() == RISCV::ADDI ||
+ Inst.getOpcode() == RISCV::LD ||
+ Inst.getOpcode() == RISCV::C_JAL ||
+ Inst.getOpcode() == RISCV::C_BEQZ ||
+ Inst.getOpcode() == RISCV::C_BNEZ;
+ }
+
+
+const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const override {
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const auto &Op = Inst.getOperand(i);
+ if (!Op.isExpr())
+ continue;
+ const MCExpr *Expr = Op.getExpr();
+ auto *Spec = llvm::dyn_cast<MCSpecifierExpr>(Expr);
+ if (Spec && Spec->getSpecifier() == RISCV::S_PCREL_LO) {
+ return getTargetSymbol(Spec->getSubExpr());
+ }
+ }
+ return nullptr;
+}
+
+
const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
auto *RISCVExpr = dyn_cast<MCSpecifierExpr>(Expr);
if (RISCVExpr && RISCVExpr->getSubExpr())
diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s
index e7e85fddfb1cb..7c8c7c81238be 100644
--- a/bolt/test/RISCV/reloc-got.s
+++ b/bolt/test/RISCV/reloc-got.s
@@ -1,5 +1,5 @@
// RUN: %clang %cflags -o %t %s
-// RUN: llvm-bolt --print-cfg --print-only=_start -o %t.null %t \
+// RUN: llvm-bolt --print-finalized --print-only=_start -o %t.null %t \
// RUN: | FileCheck %s
.data
@@ -8,16 +8,26 @@
d:
.dword 0
+ .globl e
+ .p2align 3
+e:
+ .dword 0
+
.text
.globl _start
.p2align 1
// CHECK: Binary Function "_start" after building cfg {
_start:
nop // Here to not make the _start and .Ltmp0 symbols coincide
-// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
-// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+ // CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
+ // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+ // CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1
+ // CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1)
1:
auipc t0, %got_pcrel_hi(d)
+2:
+ auipc t1, %got_pcrel_hi(e)
ld t0, %pcrel_lo(1b)(t0)
+ ld t1, %pcrel_lo(2b)(t1)
ret
- .size _start, .-_start
+ .size _start, .-_start
\ No newline at end of file
>From f79a815594a86ffc8ff3311730bf02c64d2fb5f7 Mon Sep 17 00:00:00 2001
From: yjn <1076891326 at qq.com>
Date: Sun, 20 Jul 2025 00:45:42 +0800
Subject: [PATCH 2/2] [BOLT][RISCV]fix up GOT Relocation Handling
---
bolt/lib/Passes/FixRISCVCallsPass.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/lib/Passes/FixRISCVCallsPass.cpp b/bolt/lib/Passes/FixRISCVCallsPass.cpp
index a756224432267..25fef18727f1e 100644
--- a/bolt/lib/Passes/FixRISCVCallsPass.cpp
+++ b/bolt/lib/Passes/FixRISCVCallsPass.cpp
@@ -34,7 +34,7 @@ void FixRISCVCallsPass::runOnFunction(BinaryFunction &BF) {
TargetSym = MIB->getPCRelLoSymbol(AInst);
if (TargetSym) {
auto BI = II;
- bool found = false;
+ bool foundSymbol = false;
while(BI != BB.begin()){
auto *Label = MIB->getInstLabel(*BI);
if (Label && Label == TargetSym) {
More information about the llvm-commits
mailing list